mm4"); &shufps ("xmm4","xmm0",0b10001100); &xorps ("xmm0","xmm4"); &shufps ("xmm1","xmm1",0b11111111); # critical path &xorps ("xmm0","xmm1"); &ret(); &set_label("key_256b",16); &$movekey (&QWP(0,$key),"xmm0"); &lea ($key,&DWP(16,$key)); &shufps ("xmm4","xmm2",0b00010000); &xorps ("xmm2","xmm4"); &shufps ("xmm4","xmm2",0b10001100); &xorps ("xmm2","xmm4"); &shufps ("xmm1","xmm1",0b10101010); # critical path &xorps ("xmm2","xmm1"); &ret(); &set_label("good_key"); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); &pxor ("xmm3","xmm3"); &pxor ("xmm4","xmm4"); &pxor ("xmm5","xmm5"); &xor ("eax","eax"); &pop ("ebx"); &ret (); &set_label("bad_keybits",4); &pxor ("xmm0","xmm0"); &mov ("eax",-2); &pop ("ebx"); &ret (); &function_end_B("${PREFIX}_set_encrypt_key_base"); # int $PREFIX_set_encrypt_key_alt (const unsigned char *userKey, int bits, # AES_KEY *key) &function_begin_B("${PREFIX}_set_encrypt_key_alt"); &record_function_hit(3); &mov ("eax",&wparam(0)); &mov ($rounds,&wparam(1)); &mov ($key,&wparam(2)); &push ("ebx"); &call (&label("pic")); &set_label("pic"); &blindpop("ebx"); &lea ("ebx",&DWP(&label("key_const")."-".&label("pic"),"ebx")); &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 &lea ($key,&DWP(16,$key)); &cmp ($rounds,256); &je (&label("14rounds_alt")); # 192-bit key support was removed. &cmp ($rounds,128); &jne (&label("bad_keybits")); &set_label("10rounds_alt",16); &movdqa ("xmm5",&QWP(0x00,"ebx")); &mov ($rounds,8); &movdqa ("xmm4",&QWP(0x20,"ebx")); &movdqa ("xmm2","xmm0"); &movdqu (&QWP(-16,$key),"xmm0"); &set_label("loop_key128"); &pshufb ("xmm0","xmm5"); &aesenclast ("xmm0","xmm4"); &pslld ("xmm4",1); &lea ($key,&DWP(16,$key)); &movdqa ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm2","xmm3"); &pxor ("xmm0","xmm2"); &movdqu (&QWP(-16,$key),"xmm0"); &movdqa ("xmm2","xmm0"); &dec ($rounds); &jnz (&label("loop_key128")); &movdqa ("xmm4",&QWP(0x30,"ebx")); &pshufb ("xmm0","xmm5"); &aesenclast ("xmm0","xmm4"); &pslld ("xmm4",1); &movdqa ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm2","xmm3"); &pxor ("xmm0","xmm2"); &movdqu (&QWP(0,$key),"xmm0"); &movdqa ("xmm2","xmm0"); &pshufb ("xmm0","xmm5"); &aesenclast ("xmm0","xmm4"); &movdqa ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm3","xmm2"); &pslldq ("xmm2",4); &pxor ("xmm2","xmm3"); &pxor ("xmm0","xmm2"); &movdqu (&QWP(16,$key),"xmm0"); &mov ($rounds,9); &mov (&DWP(96,$key),$rounds); &jmp (&label("good_key")); # 192-bit key support was removed. &set_label("14rounds_alt",16); &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey &lea ($key,&DWP(16,$key)); &movdqa ("xmm5",&QWP(0x00,"ebx")); &movdqa ("xmm4",&QWP(0x20,"ebx")); &mov ($rounds,7); &movdqu (&QWP(-32,$key),"xmm0"); &movdqa ("xmm1","xmm2"); &movdqu (&QWP(-16,$key),"xmm2"); &set_label("loop_key256"); &pshufb ("xmm2","xmm5"); &aesenclast ("xmm2","xmm4"); &movdqa ("xmm3","xmm0"); &pslldq ("xmm0",4); &pxor ("xmm3","xmm0"); &pslldq ("xmm0",4); &pxor ("xmm3","xmm0"); &pslldq ("xmm0",4); &pxor ("xmm0","xmm3"); &pslld ("xmm4",1); &pxor ("xmm0","xmm2"); &movdqu (&QWP(0,$key),"xmm0"); &dec ($rounds); &jz (&label("done_key256")); &pshufd ("xmm2","xmm0",0xff); &pxor ("xmm3","xmm3"); &aesenclast ("xmm2","xmm3"); &movdqa ("xmm3","xmm1"); &pslldq ("xmm1",4); &pxor ("xmm3","xmm1"); &pslldq ("xmm1",4); &pxor ("xmm3","xmm1"); &pslldq ("xmm1",4); &pxor ("xmm1","xmm3"); &pxor ("xmm2","xmm1"); &movdqu (&QWP(16,$key),"xmm2"); &lea ($key,&DWP(32,$key)); &movdqa ("xmm1","xmm2"); &jmp (&label("loop_key256")); &set_label("done_key256"); &mov ($rounds,13); &mov (&DWP(16,$key),$rounds); &set_label("good_key"); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); &pxor ("xmm3","xmm3"); &pxor ("xmm4","xmm4"); &pxor ("xmm5","xmm5"); &xor ("eax","eax"); &pop ("ebx"); &ret (); &set_label("bad_keybits",4); &pxor ("xmm0","xmm0"); &mov ("eax",-2); &pop ("ebx"); &ret (); &function_end_B("${PREFIX}_set_encrypt_key_alt"); &set_label("key_const",64); &data_word(0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d); &data_word(0x04070605,0x04070605,0x04070605,0x04070605); &data_word(1,1,1,1); &data_word(0x1b,0x1b,0x1b,0x1b); &asciz("AES for Intel AES-NI, CRYPTOGAMS by "); &asm_finish(); close STDOUT or die "error closing STDOUT: $!";