# This file is generated from a similarly-named Perl script in the BoringSSL # source tree. Do not edit by hand. #if defined(__has_feature) #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) #define OPENSSL_NO_ASM #endif #endif #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) .text .globl _GFp_aes_hw_encrypt .private_extern _GFp_aes_hw_encrypt .p2align 4 _GFp_aes_hw_encrypt: movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 movups 16(%rdx),%xmm1 leaq 32(%rdx),%rdx xorps %xmm0,%xmm2 L$oop_enc1_1: .byte 102,15,56,220,209 decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz L$oop_enc1_1 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt2: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax addq $16,%rax L$enc_loop2: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop2 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt3: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax addq $16,%rax L$enc_loop3: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop3 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt4: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 0x0f,0x1f,0x00 addq $16,%rax L$enc_loop4: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop4 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt6: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,220,209 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,217 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,220,225 pxor %xmm0,%xmm7 movups (%rcx,%rax,1),%xmm0 addq $16,%rax jmp L$enc_loop6_enter .p2align 4 L$enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 L$enc_loop6_enter: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop6 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt8: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,209 pxor %xmm0,%xmm7 pxor %xmm0,%xmm8 .byte 102,15,56,220,217 pxor %xmm0,%xmm9 movups (%rcx,%rax,1),%xmm0 addq $16,%rax jmp L$enc_loop8_inner .p2align 4 L$enc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 L$enc_loop8_inner: .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 L$enc_loop8_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop8 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 102,68,15,56,221,192 .byte 102,68,15,56,221,200 .byte 0xf3,0xc3 .globl _GFp_aes_hw_ctr32_encrypt_blocks .private_extern _GFp_aes_hw_ctr32_encrypt_blocks .p2align 4 _GFp_aes_hw_ctr32_encrypt_blocks: cmpq $1,%rdx jne L$ctr32_bulk movups (%r8),%xmm2 movups (%rdi),%xmm3 movl 240(%rcx),%edx movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_enc1_2: .byte 102,15,56,220,209 decl %edx movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_2 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 xorps %xmm3,%xmm2 pxor %xmm3,%xmm3 movups %xmm2,(%rsi) xorps %xmm2,%xmm2 jmp L$ctr32_epilogue .p2align 4 L$ctr32_bulk: leaq (%rsp),%r11 pushq %rbp subq $128,%rsp andq $-16,%rsp movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 movl 12(%r8),%r8d pxor %xmm0,%xmm2 movl 12(%rcx),%ebp movdqa %xmm2,0(%rsp) bswapl %r8d movdqa %xmm2,%xmm3 movdqa %xmm2,%xmm4 movdqa %xmm2,%xmm5 movdqa %xmm2,64(%rsp) movdqa %xmm2,80(%rsp) movdqa %xmm2,96(%rsp) movq %rdx,%r10 movdqa %xmm2,112(%rsp) leaq 1(%r8),%rax leaq 2(%r8),%rdx bswapl %eax bswapl %edx xorl %ebp,%eax xorl %ebp,%edx .byte 102,15,58,34,216,3 leaq 3(%r8),%rax movdqa %xmm3,16(%rsp) .byte 102,15,58,34,226,3 bswapl %eax movq %r10,%rdx leaq 4(%r8),%r10 movdqa %xmm4,32(%rsp) xorl %ebp,%eax bswapl %r10d .byte 102,15,58,34,232,3 xorl %ebp,%r10d movdqa %xmm5,48(%rsp) leaq 5(%r8),%r9 movl %r10d,64+12(%rsp) bswapl %r9d leaq 6(%r8),%r10 movl 240(%rcx),%eax xorl %ebp,%r9d bswapl %r10d movl %r9d,80+12(%rsp) xorl %ebp,%r10d leaq 7(%r8),%r9 movl %r10d,96+12(%rsp) bswapl %r9d leaq _GFp_ia32cap_P(%rip),%r10 movl 4(%r10),%r10d xorl %ebp,%r9d andl $71303168,%r10d movl %r9d,112+12(%rsp) movups 16(%rcx),%xmm1 movdqa 64(%rsp),%xmm6 movdqa 80(%rsp),%xmm7 cmpq $8,%rdx jb L$ctr32_tail subq $6,%rdx cmpl $4194304,%r10d je L$ctr32_6x leaq 128(%rcx),%rcx subq $2,%rdx jmp L$ctr32_loop8 .p2align 4 L$ctr32_6x: shll $4,%eax movl $48,%r10d bswapl %ebp leaq 32(%rcx,%rax,1),%rcx subq %rax,%r10 jmp L$ctr32_loop6 .p2align 4 L$ctr32_loop6: addl $6,%r8d movups -48(%rcx,%r10,1),%xmm0 .byte 102,15,56,220,209 movl %r8d,%eax xorl %ebp,%eax .byte 102,15,56,220,217 .byte 0x0f,0x38,0xf1,0x44,0x24,12 leal 1(%r8),%eax .byte 102,15,56,220,225 xorl %ebp,%eax .byte 0x0f,0x38,0xf1,0x44,0x24,28 .byte 102,15,56,220,233 leal 2(%r8),%eax xorl %ebp,%eax .byte 102,15,56,220,241 .byte 0x0f,0x38,0xf1,0x44,0x24,44 leal 3(%r8),%eax .byte 102,15,56,220,249 movups -32(%rcx,%r10,1),%xmm1 xorl %ebp,%eax .byte 102,15,56,220,208 .byte 0x0f,0x38,0xf1,0x44,0x24,60 leal 4(%r8),%eax .byte 102,15,56,220,216 xorl %ebp,%eax .byte 0x0f,0x38,0xf1,0x44,0x24,76 .byte 102,15,56,220,224 leal 5(%r8),%eax xorl %ebp,%eax .byte 102,15,56,220,232 .byte 0x0f,0x38,0xf1,0x44,0x24,92 movq %r10,%rax .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%rcx,%r10,1),%xmm0 call L$enc_loop6 movdqu (%rdi),%xmm8 movdqu 16(%rdi),%xmm9 movdqu 32(%rdi),%xmm10 movdqu 48(%rdi),%xmm11 movdqu 64(%rdi),%xmm12 movdqu 80(%rdi),%xmm13 leaq 96(%rdi),%rdi movups -64(%rcx,%r10,1),%xmm1 pxor %xmm2,%xmm8 movaps 0(%rsp),%xmm2 pxor %xmm3,%xmm9 movaps 16(%rsp),%xmm3 pxor %xmm4,%xmm10 movaps 32(%rsp),%xmm4 pxor %xmm5,%xmm11 movaps 48(%rsp),%xmm5 pxor %xmm6,%xmm12 movaps 64(%rsp),%xmm6 pxor %xmm7,%xmm13 movaps 80(%rsp),%xmm7 movdqu %xmm8,(%rsi) movdqu %xmm9,16(%rsi) movdqu %xmm10,32(%rsi) movdqu %xmm11,48(%rsi) movdqu %xmm12,64(%rsi) movdqu %xmm13,80(%rsi) leaq 96(%rsi),%rsi subq $6,%rdx jnc L$ctr32_loop6 addq $6,%rdx jz L$ctr32_done leal -48(%r10),%eax leaq -80(%rcx,%r10,1),%rcx negl %eax shrl $4,%eax jmp L$ctr32_tail .p2align 5 L$ctr32_loop8: addl $8,%r8d movdqa 96(%rsp),%xmm8 .byte 102,15,56,220,209 movl %r8d,%r9d movdqa 112(%rsp),%xmm9 .byte 102,15,56,220,217 bswapl %r9d movups 32-128(%rcx),%xmm0 .byte 102,15,56,220,225 xorl %ebp,%r9d nop .byte 102,15,56,220,233 movl %r9d,0+12(%rsp) leaq 1(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 48-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 xorl %ebp,%r9d .byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movl %r9d,16+12(%rsp) leaq 2(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 64-128(%rcx),%xmm0 bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 xorl %ebp,%r9d .byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movl %r9d,32+12(%rsp) leaq 3(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 80-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 xorl %ebp,%r9d .byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movl %r9d,48+12(%rsp) leaq 4(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 96-128(%rcx),%xmm0 bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 xorl %ebp,%r9d .byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movl %r9d,64+12(%rsp) leaq 5(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 112-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 xorl %ebp,%r9d .byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movl %r9d,80+12(%rsp) leaq 6(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 128-128(%rcx),%xmm0 bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 xorl %ebp,%r9d .byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movl %r9d,96+12(%rsp) leaq 7(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 144-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 xorl %ebp,%r9d movdqu 0(%rdi),%xmm10 .byte 102,15,56,220,232 movl %r9d,112+12(%rsp) cmpl $11,%eax .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 160-128(%rcx),%xmm0 jb L$ctr32_enc_done .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 176-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 192-128(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 208-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 224-128(%rcx),%xmm0 jmp L$ctr32_enc_done .p2align 4 L$ctr32_enc_done: movdqu 16(%rdi),%xmm11 pxor %xmm0,%xmm10 movdqu 32(%rdi),%xmm12 pxor %xmm0,%xmm11 movdqu 48(%rdi),%xmm13 pxor %xmm0,%xmm12 movdqu 64(%rdi),%xmm14 pxor %xmm0,%xmm13 movdqu 80(%rdi),%xmm15 pxor %xmm0,%xmm14 pxor %xmm0,%xmm15 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movdqu 96(%rdi),%xmm1 leaq 128(%rdi),%rdi .byte 102,65,15,56,221,210 pxor %xmm0,%xmm1 movdqu 112-128(%rdi),%xmm10 .byte 102,65,15,56,221,219 pxor %xmm0,%xmm10 movdqa 0(%rsp),%xmm11 .byte 102,65,15,56,221,228 .byte 102,65,15,56,221,237 movdqa 16(%rsp),%xmm12 movdqa 32(%rsp),%xmm13 .byte 102,65,15,56,221,246 .byte 102,65,15,56,221,255 movdqa 48(%rsp),%xmm14 movdqa 64(%rsp),%xmm15 .byte 102,68,15,56,221,193 movdqa 80(%rsp),%xmm0 movups 16-128(%rcx),%xmm1 .byte 102,69,15,56,221,202 movups %xmm2,(%rsi) movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) movdqa %xmm0,%xmm7 movups %xmm8,96(%rsi) movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi subq $8,%rdx jnc L$ctr32_loop8 addq $8,%rdx jz L$ctr32_done leaq -128(%rcx),%rcx L$ctr32_tail: leaq 16(%rcx),%rcx cmpq $4,%rdx jb L$ctr32_loop3 je L$ctr32_loop4 shll $4,%eax movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 movups 16(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,220,217 leaq 32-16(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,225 addq $16,%rax movups (%rdi),%xmm10 .byte 102,15,56,220,233 .byte 102,15,56,220,241 movups 16(%rdi),%xmm11 movups 32(%rdi),%xmm12 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 call L$enc_loop8_enter movdqu 48(%rdi),%xmm13 pxor %xmm10,%xmm2 movdqu 64(%rdi),%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm10,%xmm6 movdqu %xmm5,48(%rsi) movdqu %xmm6,64(%rsi) cmpq $6,%rdx jb L$ctr32_done movups 80(%rdi),%xmm11 xorps %xmm11,%xmm7 movups %xmm7,80(%rsi) je L$ctr32_done movups 96(%rdi),%xmm12 xorps %xmm12,%xmm8 movups %xmm8,96(%rsi) jmp L$ctr32_done .p2align 5 L$ctr32_loop4: .byte 102,15,56,220,209 leaq 16(%rcx),%rcx decl %eax .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%rcx),%xmm1 jnz L$ctr32_loop4 .byte 102,15,56,221,209 .byte 102,15,56,221,217 movups (%rdi),%xmm10 movups 16(%rdi),%xmm11 .byte 102,15,56,221,225 .byte 102,15,56,221,233 movups 32(%rdi),%xmm12 movups 48(%rdi),%xmm13 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) xorps %xmm11,%xmm3 movups %xmm3,16(%rsi) pxor %xmm12,%xmm4 movdqu %xmm4,32(%rsi) pxor %xmm13,%xmm5 movdqu %xmm5,48(%rsi) jmp L$ctr32_done .p2align 5 L$ctr32_loop3: .byte 102,15,56,220,209 leaq 16(%rcx),%rcx decl %eax .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%rcx),%xmm1 jnz L$ctr32_loop3 .byte 102,15,56,221,209 .byte 102,15,56,221,217 .byte 102,15,56,221,225 movups (%rdi),%xmm10 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) cmpq $2,%rdx jb L$ctr32_done movups 16(%rdi),%xmm11 xorps %xmm11,%xmm3 movups %xmm3,16(%rsi) je L$ctr32_done movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) L$ctr32_done: xorps %xmm0,%xmm0 xorl %ebp,%ebp pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 movaps %xmm0,0(%rsp) pxor %xmm8,%xmm8 movaps %xmm0,16(%rsp) pxor %xmm9,%xmm9 movaps %xmm0,32(%rsp) pxor %xmm10,%xmm10 movaps %xmm0,48(%rsp) pxor %xmm11,%xmm11 movaps %xmm0,64(%rsp) pxor %xmm12,%xmm12 movaps %xmm0,80(%rsp) pxor %xmm13,%xmm13 movaps %xmm0,96(%rsp) pxor %xmm14,%xmm14 movaps %xmm0,112(%rsp) pxor %xmm15,%xmm15 movq -8(%r11),%rbp leaq (%r11),%rsp L$ctr32_epilogue: .byte 0xf3,0xc3 .globl _GFp_aes_hw_set_encrypt_key .private_extern _GFp_aes_hw_set_encrypt_key .p2align 4 _GFp_aes_hw_set_encrypt_key: __aesni_set_encrypt_key: .byte 0x48,0x83,0xEC,0x08 movq $-1,%rax testq %rdi,%rdi jz L$enc_key_ret testq %rdx,%rdx jz L$enc_key_ret movups (%rdi),%xmm0 xorps %xmm4,%xmm4 leaq _GFp_ia32cap_P(%rip),%r10 movl 4(%r10),%r10d andl $268437504,%r10d leaq 16(%rdx),%rax cmpl $256,%esi je L$14rounds cmpl $128,%esi jne L$bad_keybits L$10rounds: movl $9,%esi cmpl $268435456,%r10d je L$10rounds_alt movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call L$key_expansion_128_cold .byte 102,15,58,223,200,2 call L$key_expansion_128 .byte 102,15,58,223,200,4 call L$key_expansion_128 .byte 102,15,58,223,200,8 call L$key_expansion_128 .byte 102,15,58,223,200,16 call L$key_expansion_128 .byte 102,15,58,223,200,32 call L$key_expansion_128 .byte 102,15,58,223,200,64 call L$key_expansion_128 .byte 102,15,58,223,200,128 call L$key_expansion_128 .byte 102,15,58,223,200,27 call L$key_expansion_128 .byte 102,15,58,223,200,54 call L$key_expansion_128 movups %xmm0,(%rax) movl %esi,80(%rax) xorl %eax,%eax jmp L$enc_key_ret .p2align 4 L$10rounds_alt: movdqa L$key_rotate(%rip),%xmm5 movl $8,%r10d movdqa L$key_rcon1(%rip),%xmm4 movdqa %xmm0,%xmm2 movdqu %xmm0,(%rdx) jmp L$oop_key128 .p2align 4 L$oop_key128: .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 leaq 16(%rax),%rax movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,-16(%rax) movdqa %xmm0,%xmm2 decl %r10d jnz L$oop_key128 movdqa L$key_rcon1b(%rip),%xmm4 .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,(%rax) movdqa %xmm0,%xmm2 .byte 102,15,56,0,197 .byte 102,15,56,221,196 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,16(%rax) movl %esi,96(%rax) xorl %eax,%eax jmp L$enc_key_ret .p2align 4 L$14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax cmpl $268435456,%r10d je L$14rounds_alt movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 call L$key_expansion_256a_cold .byte 102,15,58,223,200,1 call L$key_expansion_256b .byte 102,15,58,223,202,2 call L$key_expansion_256a .byte 102,15,58,223,200,2 call L$key_expansion_256b .byte 102,15,58,223,202,4 call L$key_expansion_256a .byte 102,15,58,223,200,4 call L$key_expansion_256b .byte 102,15,58,223,202,8 call L$key_expansion_256a .byte 102,15,58,223,200,8 call L$key_expansion_256b .byte 102,15,58,223,202,16 call L$key_expansion_256a .byte 102,15,58,223,200,16 call L$key_expansion_256b .byte 102,15,58,223,202,32 call L$key_expansion_256a .byte 102,15,58,223,200,32 call L$key_expansion_256b .byte 102,15,58,223,202,64 call L$key_expansion_256a movups %xmm0,(%rax) movl %esi,16(%rax) xorq %rax,%rax jmp L$enc_key_ret .p2align 4 L$14rounds_alt: movdqa L$key_rotate(%rip),%xmm5 movdqa L$key_rcon1(%rip),%xmm4 movl $7,%r10d movdqu %xmm0,0(%rdx) movdqa %xmm2,%xmm1 movdqu %xmm2,16(%rdx) jmp L$oop_key256 .p2align 4 L$oop_key256: .byte 102,15,56,0,213 .byte 102,15,56,221,212 movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pslld $1,%xmm4 pxor %xmm2,%xmm0 movdqu %xmm0,(%rax) decl %r10d jz L$done_key256 pshufd $0xff,%xmm0,%xmm2 pxor %xmm3,%xmm3 .byte 102,15,56,221,211 movdqa %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm3,%xmm1 pxor %xmm1,%xmm2 movdqu %xmm2,16(%rax) leaq 32(%rax),%rax movdqa %xmm2,%xmm1 jmp L$oop_key256 L$done_key256: movl %esi,16(%rax) xorl %eax,%eax jmp L$enc_key_ret .p2align 4 L$bad_keybits: movq $-2,%rax L$enc_key_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 addq $8,%rsp .byte 0xf3,0xc3 L$SEH_end_GFp_set_encrypt_key: .p2align 4 L$key_expansion_128: movups %xmm0,(%rax) leaq 16(%rax),%rax L$key_expansion_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 .byte 0xf3,0xc3 .p2align 4 L$key_expansion_192a: movups %xmm0,(%rax) leaq 16(%rax),%rax L$key_expansion_192a_cold: movaps %xmm2,%xmm5 L$key_expansion_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 pslldq $4,%xmm3 xorps %xmm4,%xmm0 pshufd $85,%xmm1,%xmm1 pxor %xmm3,%xmm2 pxor %xmm1,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm3,%xmm2 .byte 0xf3,0xc3 .p2align 4 L$key_expansion_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%rax) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%rax) leaq 32(%rax),%rax jmp L$key_expansion_192b_warm .p2align 4 L$key_expansion_256a: movups %xmm2,(%rax) leaq 16(%rax),%rax L$key_expansion_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 .byte 0xf3,0xc3 .p2align 4 L$key_expansion_256b: movups %xmm0,(%rax) leaq 16(%rax),%rax shufps $16,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $140,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 .byte 0xf3,0xc3 .p2align 6 L$bswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 L$increment32: .long 6,6,6,0 L$increment64: .long 1,0,0,0 L$increment1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 L$key_rotate: .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d L$key_rotate192: .long 0x04070605,0x04070605,0x04070605,0x04070605 L$key_rcon1: .long 1,1,1,1 L$key_rcon1b: .long 0x1b,0x1b,0x1b,0x1b .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 #endif