ut2,$rndkey1); &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); &movups ($rndkey1,&QWP(0,$inp)); &movups ($rndkey0,&QWP(0x10,$inp)); &xorps ($inout0,$rndkey1); &movups ($rndkey1,&QWP(0x20,$inp)); &xorps ($inout1,$rndkey0); &movups (&QWP(0,$out),$inout0); &movdqa ($rndkey0,&QWP(16,"esp")); # load increment &xorps ($inout2,$rndkey1); &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x20,$out),$inout2); &paddd ($rndkey1,$rndkey0); # 2nd triplet increment &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask &movups ($inout1,&QWP(0x30,$inp)); &movups ($inout2,&QWP(0x40,$inp)); &xorps ($inout3,$inout1); &movups ($inout1,&QWP(0x50,$inp)); &lea ($inp,&DWP(0x60,$inp)); &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet &pshufb ($rndkey0,$inout0); # byte swap &xorps ($inout4,$inout2); &movups (&QWP(0x30,$out),$inout3); &xorps ($inout5,$inout1); &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet &pshufb ($rndkey1,$inout0); # byte swap &movups (&QWP(0x40,$out),$inout4); &pshufd ($inout0,$rndkey0,3<<6); &movups (&QWP(0x50,$out),$inout5); &lea ($out,&DWP(0x60,$out)); &pshufd ($inout1,$rndkey0,2<<6); &sub ($len,6); &jnc (&label("ctr32_loop6")); &add ($len,6); &jz (&label("ctr32_ret")); &movdqu ($inout5,&QWP(0,$key_)); &mov ($key,$key_); &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec &mov ($rounds,&DWP(240,$key_)); # restore $rounds &set_label("ctr32_tail"); &por ($inout0,$inout5); &cmp ($len,2); &jb (&label("ctr32_one")); &pshufd ($inout2,$rndkey0,1<<6); &por ($inout1,$inout5); &je (&label("ctr32_two")); &pshufd ($inout3,$rndkey1,3<<6); &por ($inout2,$inout5); &cmp ($len,4); &jb (&label("ctr32_three")); &pshufd ($inout4,$rndkey1,2<<6); &por ($inout3,$inout5); &je (&label("ctr32_four")); &por ($inout4,$inout5); &call ("_aesni_encrypt6"); &movups ($rndkey1,&QWP(0,$inp)); &movups ($rndkey0,&QWP(0x10,$inp)); &xorps ($inout0,$rndkey1); &movups ($rndkey1,&QWP(0x20,$inp)); &xorps ($inout1,$rndkey0); &movups ($rndkey0,&QWP(0x30,$inp)); &xorps ($inout2,$rndkey1); &movups ($rndkey1,&QWP(0x40,$inp)); &xorps ($inout3,$rndkey0); &movups (&QWP(0,$out),$inout0); &xorps ($inout4,$rndkey1); &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x20,$out),$inout2); &movups (&QWP(0x30,$out),$inout3); &movups (&QWP(0x40,$out),$inout4); &jmp (&label("ctr32_ret")); &set_label("ctr32_one_shortcut",16); &movups ($inout0,&QWP(0,$rounds_)); # load ivec &mov ($rounds,&DWP(240,$key)); &set_label("ctr32_one"); if ($inline) { &aesni_inline_generate1("enc"); } else { &call ("_aesni_encrypt1"); } &movups ($in0,&QWP(0,$inp)); &xorps ($in0,$inout0); &movups (&QWP(0,$out),$in0); &jmp (&label("ctr32_ret")); &set_label("ctr32_two",16); &call ("_aesni_encrypt2"); &movups ($inout3,&QWP(0,$inp)); &movups ($inout4,&QWP(0x10,$inp)); &xorps ($inout0,$inout3); &xorps ($inout1,$inout4); &movups (&QWP(0,$out),$inout0); &movups (&QWP(0x10,$out),$inout1); &jmp (&label("ctr32_ret")); &set_label("ctr32_three",16); &call ("_aesni_encrypt3"); &movups ($inout3,&QWP(0,$inp)); &movups ($inout4,&QWP(0x10,$inp)); &xorps ($inout0,$inout3); &movups ($inout5,&QWP(0x20,$inp)); &xorps ($inout1,$inout4); &movups (&QWP(0,$out),$inout0); &xorps ($inout2,$inout5); &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x20,$out),$inout2); &jmp (&label("ctr32_ret")); &set_label("ctr32_four",16); &call ("_aesni_encrypt4"); &movups ($inout4,&QWP(0,$inp)); &movups ($inout5,&QWP(0x10,$inp)); &movups ($rndkey1,&QWP(0x20,$inp)); &xorps ($inout0,$inout4); &movups ($rndkey0,&QWP(0x30,$inp)); &xorps ($inout1,$inout5); &movups (&QWP(0,$out),$inout0); &xorps ($inout2,$rndkey1); &movups (&QWP(0x10,$out),$inout1); &xorps ($inout3,$rndkey0); &movups (&QWP(0x20,$out),$inout2); &movups (&QWP(0x30,$out),$inout3); &set_label("ctr32_ret"); &pxor ("xmm0","xmm0"); # clear register bank &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); &pxor ("xmm3","xmm3"); &pxor ("xmm4","xmm4"); &movdqa (&QWP(32,"esp"),"xmm0"); # clear stack &pxor ("xmm5","xmm5"); &movdqa (&QWP(48,"esp"),"xmm0"); &pxor ("xmm6","xmm6"); &movdqa (&QWP(64,"esp"),"xmm0"); &pxor ("xmm7","xmm7"); &mov ("esp",&DWP(80,"esp")); &function_end("${PREFIX}_ctr32_encrypt_blocks"); }