${p}last ($inout1,$rndkey0)"; &ret(); &function_end_B("_aesni_${p}rypt2"); } sub aesni_generate3 { my $p=shift; &function_begin_B("_aesni_${p}rypt3"); &$movekey ($rndkey0,&QWP(0,$key)); &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &$movekey ($rndkey0,&QWP(32,$key)); &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); &add ($rounds,16); &set_label("${p}3_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}3_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p}last ($inout0,$rndkey0)"; eval"&aes${p}last ($inout1,$rndkey0)"; eval"&aes${p}last ($inout2,$rndkey0)"; &ret(); &function_end_B("_aesni_${p}rypt3"); } # 4x interleave is implemented to improve small block performance, # most notably [and naturally] 4 block by ~30%. One can argue that one # should have implemented 5x as well, but improvement would be <20%, # so it's not worth it... sub aesni_generate4 { my $p=shift; &function_begin_B("_aesni_${p}rypt4"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &shl ($rounds,4); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout3,$rndkey0); &$movekey ($rndkey0,&QWP(32,$key)); &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); &data_byte (0x0f,0x1f,0x40,0x00); &add ($rounds,16); &set_label("${p}4_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}4_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p}last ($inout0,$rndkey0)"; eval"&aes${p}last ($inout1,$rndkey0)"; eval"&aes${p}last ($inout2,$rndkey0)"; eval"&aes${p}last ($inout3,$rndkey0)"; &ret(); &function_end_B("_aesni_${p}rypt4"); } sub aesni_generate6 { my $p=shift; &function_begin_B("_aesni_${p}rypt6"); &static_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey0,&QWP(0,$key)); &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); # pxor does better here &pxor ($inout2,$rndkey0); eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout3,$rndkey0); &pxor ($inout4,$rndkey0); eval"&aes${p} ($inout1,$rndkey1)"; &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout5,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key,$rounds)); &add ($rounds,16); &jmp (&label("_aesni_${p}rypt6_inner")); &set_label("${p}6_loop",16); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; &set_label("_aesni_${p}rypt6_inner"); eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; &set_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout4,$rndkey0)"; eval"&aes${p} ($inout5,$rndkey0)"; &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}6_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; eval"&aes${p}last ($inout0,$rndkey0)"; eval"&aes${p}last ($inout1,$rndkey0)"; eval"&aes${p}last ($inout2,$rndkey0)"; eval"&aes${p}last ($inout3,$rndkey0)"; eval"&aes${p}last ($inout4,$rndkey0)"; eval"&aes${p}last ($inout5,$rndkey0)"; &ret(); &function_end_B("_aesni_${p}rypt6"); } &aesni_generate2("enc") if ($PREFIX eq $AESNI_PREFIX); &aesni_generate3("enc") if ($PREFIX eq $AESNI_PREFIX); &aesni_generate4("enc") if ($PREFIX eq $AESNI_PREFIX); &aesni_generate6("enc") if ($PREFIX eq $AESNI_PREFIX);