diff options
author | Robert Sloan <varomodt@google.com> | 2019-03-01 15:53:37 -0800 |
---|---|---|
committer | Robert Sloan <varomodt@google.com> | 2019-03-01 16:02:19 -0800 |
commit | 4c22c5fad19b2a554bcb056ca25ca4cc2ef6a45c (patch) | |
tree | fb0dc666dadcc955218e7a701ea482a9270d1a53 /linux-x86_64 | |
parent | b6b07c32187eec60f4c9f27e0959c58d145f87ef (diff) | |
download | boringssl-4c22c5fad19b2a554bcb056ca25ca4cc2ef6a45c.tar.gz |
external/boringssl: Sync to c3889634a1aa52575c5d26497696238208fbd0f5.
This includes the following changes:
https://boringssl.googlesource.com/boringssl/+log/41c10e2b5f37edce8b9f292f7f3bacb7e30e25c4..c3889634a1aa52575c5d26497696238208fbd0f5
Test: atest CtsLibcoreTestCases
Change-Id: Ia1c2941ccf58a9e0d736b3409a2d13c21603a205
Diffstat (limited to 'linux-x86_64')
-rw-r--r-- | linux-x86_64/crypto/chacha/chacha-x86_64.S | 34 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/aes-x86_64.S | 16 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S | 7 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/aesni-x86_64.S | 2054 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S | 13 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S | 426 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/ghash-x86_64.S | 12 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/md5-x86_64.S | 18 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S | 36 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S | 21 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S | 12 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/rsaz-avx2.S | 25 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S | 41 | ||||
-rw-r--r-- | linux-x86_64/crypto/fipsmodule/x86_64-mont5.S | 21 | ||||
-rw-r--r-- | linux-x86_64/crypto/test/trampoline-x86_64.S | 517 |
15 files changed, 1217 insertions, 2036 deletions
diff --git a/linux-x86_64/crypto/chacha/chacha-x86_64.S b/linux-x86_64/crypto/chacha/chacha-x86_64.S index 785b2dc4..4e2267bb 100644 --- a/linux-x86_64/crypto/chacha/chacha-x86_64.S +++ b/linux-x86_64/crypto/chacha/chacha-x86_64.S @@ -50,6 +50,7 @@ .type ChaCha20_ctr32,@function .align 64 ChaCha20_ctr32: +.cfi_startproc cmpq $0,%rdx je .Lno_data movq OPENSSL_ia32cap_P+4(%rip),%r10 @@ -57,12 +58,25 @@ ChaCha20_ctr32: jnz .LChaCha20_ssse3 pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx,-16 pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp,-24 pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12,-32 pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13,-40 pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14,-48 pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15,-56 subq $64+24,%rsp +.cfi_adjust_cfa_offset 88 .Lctr32_body: @@ -303,20 +317,30 @@ ChaCha20_ctr32: .Ldone: leaq 64+24+48(%rsp),%rsi movq -48(%rsi),%r15 +.cfi_restore r15 movq -40(%rsi),%r14 +.cfi_restore r14 movq -32(%rsi),%r13 +.cfi_restore r13 movq -24(%rsi),%r12 +.cfi_restore r12 movq -16(%rsi),%rbp +.cfi_restore rbp movq -8(%rsi),%rbx +.cfi_restore rbx leaq (%rsi),%rsp +.cfi_adjust_cfa_offset -136 .Lno_data: .byte 0xf3,0xc3 +.cfi_endproc .size ChaCha20_ctr32,.-ChaCha20_ctr32 .type ChaCha20_ssse3,@function .align 32 ChaCha20_ssse3: .LChaCha20_ssse3: +.cfi_startproc movq %rsp,%r9 +.cfi_def_cfa_register r9 cmpq $128,%rdx ja .LChaCha20_4x @@ -442,14 +466,18 @@ ChaCha20_ssse3: .Ldone_ssse3: leaq (%r9),%rsp +.cfi_def_cfa_register rsp .Lssse3_epilogue: .byte 0xf3,0xc3 +.cfi_endproc .size ChaCha20_ssse3,.-ChaCha20_ssse3 .type ChaCha20_4x,@function .align 32 ChaCha20_4x: .LChaCha20_4x: +.cfi_startproc movq %rsp,%r9 +.cfi_def_cfa_register r9 movq %r10,%r11 shrq $32,%r10 testq $32,%r10 @@ -990,14 +1018,18 @@ ChaCha20_4x: .Ldone4x: leaq (%r9),%rsp +.cfi_def_cfa_register rsp .L4x_epilogue: .byte 0xf3,0xc3 +.cfi_endproc .size ChaCha20_4x,.-ChaCha20_4x .type ChaCha20_8x,@function .align 32 ChaCha20_8x: .LChaCha20_8x: +.cfi_startproc movq %rsp,%r9 +.cfi_def_cfa_register r9 subq $0x280+8,%rsp andq $-32,%rsp vzeroupper @@ -1592,7 +1624,9 @@ ChaCha20_8x: .Ldone8x: vzeroall leaq (%r9),%rsp +.cfi_def_cfa_register rsp .L8x_epilogue: .byte 0xf3,0xc3 +.cfi_endproc .size ChaCha20_8x,.-ChaCha20_8x #endif diff --git a/linux-x86_64/crypto/fipsmodule/aes-x86_64.S b/linux-x86_64/crypto/fipsmodule/aes-x86_64.S index 0dca2617..f45e010e 100644 --- a/linux-x86_64/crypto/fipsmodule/aes-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/aes-x86_64.S @@ -168,6 +168,7 @@ _x86_64_AES_encrypt: .type _x86_64_AES_encrypt_compact,@function .align 16 _x86_64_AES_encrypt_compact: +.cfi_startproc leaq 128(%r14),%r8 movl 0-128(%r8),%edi movl 32-128(%r8),%ebp @@ -337,6 +338,7 @@ _x86_64_AES_encrypt_compact: xorl 8(%r15),%ecx xorl 12(%r15),%edx .byte 0xf3,0xc3 +.cfi_endproc .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact .align 16 .globl aes_nohw_encrypt @@ -580,6 +582,7 @@ _x86_64_AES_decrypt: .type _x86_64_AES_decrypt_compact,@function .align 16 _x86_64_AES_decrypt_compact: +.cfi_startproc leaq 128(%r14),%r8 movl 0-128(%r8),%edi movl 32-128(%r8),%ebp @@ -801,6 +804,7 @@ _x86_64_AES_decrypt_compact: xorl 8(%r15),%ecx xorl 12(%r15),%edx .byte 0xf3,0xc3 +.cfi_endproc .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact .align 16 .globl aes_nohw_decrypt @@ -932,6 +936,7 @@ aes_nohw_set_encrypt_key: .type _x86_64_AES_set_encrypt_key,@function .align 16 _x86_64_AES_set_encrypt_key: +.cfi_startproc movl %esi,%ecx movq %rdi,%rsi movq %rdx,%rdi @@ -1167,6 +1172,7 @@ _x86_64_AES_set_encrypt_key: movq $-1,%rax .Lexit: .byte 0xf3,0xc3 +.cfi_endproc .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key .align 16 .globl aes_nohw_set_decrypt_key @@ -1390,8 +1396,9 @@ aes_nohw_cbc_encrypt: cmpq $0,%rdx je .Lcbc_epilogue pushfq + + .cfi_adjust_cfa_offset 8 -.cfi_offset 49,-16 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-24 @@ -1420,6 +1427,7 @@ aes_nohw_cbc_encrypt: cmpq $0,%r9 cmoveq %r10,%r14 +.cfi_remember_state leaq OPENSSL_ia32cap_P(%rip),%r10 movl (%r10),%r10d cmpq $512,%rdx @@ -1656,6 +1664,7 @@ aes_nohw_cbc_encrypt: .align 16 .Lcbc_slow_prologue: +.cfi_restore_state leaq -88(%rsp),%rbp andq $-64,%rbp @@ -1667,8 +1676,10 @@ aes_nohw_cbc_encrypt: subq %r10,%rbp xchgq %rsp,%rbp +.cfi_def_cfa_register %rbp movq %rbp,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40 .Lcbc_slow_body: @@ -1857,8 +1868,9 @@ aes_nohw_cbc_encrypt: .cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq + + .cfi_adjust_cfa_offset -8 -.cfi_restore 49 .Lcbc_epilogue: .byte 0xf3,0xc3 .cfi_endproc diff --git a/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S b/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S index 066f4774..240cb5d4 100644 --- a/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S @@ -556,6 +556,13 @@ _aesni_ctr32_6x: .align 32 aesni_gcm_encrypt: .cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+2(%rip) +#endif +#endif xorq %r10,%r10 diff --git a/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S b/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S index 9ea98246..42e55307 100644 --- a/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S @@ -19,6 +19,14 @@ .type aes_hw_encrypt,@function .align 16 aes_hw_encrypt: +.cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+1(%rip) +#endif +#endif movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 @@ -37,6 +45,7 @@ aes_hw_encrypt: movups %xmm2,(%rsi) pxor %xmm2,%xmm2 .byte 0xf3,0xc3 +.cfi_endproc .size aes_hw_encrypt,.-aes_hw_encrypt .globl aes_hw_decrypt @@ -44,6 +53,7 @@ aes_hw_encrypt: .type aes_hw_decrypt,@function .align 16 aes_hw_decrypt: +.cfi_startproc movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 @@ -62,10 +72,12 @@ aes_hw_decrypt: movups %xmm2,(%rsi) pxor %xmm2,%xmm2 .byte 0xf3,0xc3 +.cfi_endproc .size aes_hw_decrypt, .-aes_hw_decrypt .type _aesni_encrypt2,@function .align 16 _aesni_encrypt2: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -91,10 +103,12 @@ _aesni_encrypt2: .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_encrypt2,.-_aesni_encrypt2 .type _aesni_decrypt2,@function .align 16 _aesni_decrypt2: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -120,10 +134,12 @@ _aesni_decrypt2: .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -154,10 +170,12 @@ _aesni_encrypt3: .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_encrypt3,.-_aesni_encrypt3 .type _aesni_decrypt3,@function .align 16 _aesni_decrypt3: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -188,10 +206,12 @@ _aesni_decrypt3: .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_decrypt3,.-_aesni_decrypt3 .type _aesni_encrypt4,@function .align 16 _aesni_encrypt4: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -228,10 +248,12 @@ _aesni_encrypt4: .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_encrypt4,.-_aesni_encrypt4 .type _aesni_decrypt4,@function .align 16 _aesni_decrypt4: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -268,10 +290,12 @@ _aesni_decrypt4: .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_decrypt4,.-_aesni_decrypt4 .type _aesni_encrypt6,@function .align 16 _aesni_encrypt6: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -322,10 +346,12 @@ _aesni_encrypt6: .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_encrypt6,.-_aesni_encrypt6 .type _aesni_decrypt6,@function .align 16 _aesni_decrypt6: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -376,10 +402,12 @@ _aesni_decrypt6: .byte 102,15,56,223,240 .byte 102,15,56,223,248 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_decrypt6,.-_aesni_decrypt6 .type _aesni_encrypt8,@function .align 16 _aesni_encrypt8: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -440,10 +468,12 @@ _aesni_encrypt8: .byte 102,68,15,56,221,192 .byte 102,68,15,56,221,200 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_encrypt8,.-_aesni_encrypt8 .type _aesni_decrypt8,@function .align 16 _aesni_decrypt8: +.cfi_startproc movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 @@ -504,12 +534,14 @@ _aesni_decrypt8: .byte 102,68,15,56,223,192 .byte 102,68,15,56,223,200 .byte 0xf3,0xc3 +.cfi_endproc .size _aesni_decrypt8,.-_aesni_decrypt8 .globl aes_hw_ecb_encrypt .hidden aes_hw_ecb_encrypt .type aes_hw_ecb_encrypt,@function .align 16 aes_hw_ecb_encrypt: +.cfi_startproc andq $-16,%rdx jz .Lecb_ret @@ -847,175 +879,19 @@ aes_hw_ecb_encrypt: xorps %xmm0,%xmm0 pxor %xmm1,%xmm1 .byte 0xf3,0xc3 +.cfi_endproc .size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt -.globl aes_hw_ccm64_encrypt_blocks -.hidden aes_hw_ccm64_encrypt_blocks -.type aes_hw_ccm64_encrypt_blocks,@function -.align 16 -aes_hw_ccm64_encrypt_blocks: - movl 240(%rcx),%eax - movdqu (%r8),%xmm6 - movdqa .Lincrement64(%rip),%xmm9 - movdqa .Lbswap_mask(%rip),%xmm7 - - shll $4,%eax - movl $16,%r10d - leaq 0(%rcx),%r11 - movdqu (%r9),%xmm3 - movdqa %xmm6,%xmm2 - leaq 32(%rcx,%rax,1),%rcx -.byte 102,15,56,0,247 - subq %rax,%r10 - jmp .Lccm64_enc_outer -.align 16 -.Lccm64_enc_outer: - movups (%r11),%xmm0 - movq %r10,%rax - movups (%rdi),%xmm8 - - xorps %xmm0,%xmm2 - movups 16(%r11),%xmm1 - xorps %xmm8,%xmm0 - xorps %xmm0,%xmm3 - movups 32(%r11),%xmm0 - -.Lccm64_enc2_loop: -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Lccm64_enc2_loop -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - paddq %xmm9,%xmm6 - decq %rdx -.byte 102,15,56,221,208 -.byte 102,15,56,221,216 - - leaq 16(%rdi),%rdi - xorps %xmm2,%xmm8 - movdqa %xmm6,%xmm2 - movups %xmm8,(%rsi) -.byte 102,15,56,0,215 - leaq 16(%rsi),%rsi - jnz .Lccm64_enc_outer - - pxor %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - movups %xmm3,(%r9) - pxor %xmm3,%xmm3 - pxor %xmm8,%xmm8 - pxor %xmm6,%xmm6 - .byte 0xf3,0xc3 -.size aes_hw_ccm64_encrypt_blocks,.-aes_hw_ccm64_encrypt_blocks -.globl aes_hw_ccm64_decrypt_blocks -.hidden aes_hw_ccm64_decrypt_blocks -.type aes_hw_ccm64_decrypt_blocks,@function -.align 16 -aes_hw_ccm64_decrypt_blocks: - movl 240(%rcx),%eax - movups (%r8),%xmm6 - movdqu (%r9),%xmm3 - movdqa .Lincrement64(%rip),%xmm9 - movdqa .Lbswap_mask(%rip),%xmm7 - - movaps %xmm6,%xmm2 - movl %eax,%r10d - movq %rcx,%r11 -.byte 102,15,56,0,247 - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_enc1_5: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_enc1_5 -.byte 102,15,56,221,209 - shll $4,%r10d - movl $16,%eax - movups (%rdi),%xmm8 - paddq %xmm9,%xmm6 - leaq 16(%rdi),%rdi - subq %r10,%rax - leaq 32(%r11,%r10,1),%rcx - movq %rax,%r10 - jmp .Lccm64_dec_outer -.align 16 -.Lccm64_dec_outer: - xorps %xmm2,%xmm8 - movdqa %xmm6,%xmm2 - movups %xmm8,(%rsi) - leaq 16(%rsi),%rsi -.byte 102,15,56,0,215 - - subq $1,%rdx - jz .Lccm64_dec_break - - movups (%r11),%xmm0 - movq %r10,%rax - movups 16(%r11),%xmm1 - xorps %xmm0,%xmm8 - xorps %xmm0,%xmm2 - xorps %xmm8,%xmm3 - movups 32(%r11),%xmm0 - jmp .Lccm64_dec2_loop -.align 16 -.Lccm64_dec2_loop: -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Lccm64_dec2_loop - movups (%rdi),%xmm8 - paddq %xmm9,%xmm6 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,221,208 -.byte 102,15,56,221,216 - leaq 16(%rdi),%rdi - jmp .Lccm64_dec_outer - -.align 16 -.Lccm64_dec_break: - - movl 240(%r11),%eax - movups (%r11),%xmm0 - movups 16(%r11),%xmm1 - xorps %xmm0,%xmm8 - leaq 32(%r11),%r11 - xorps %xmm8,%xmm3 -.Loop_enc1_6: -.byte 102,15,56,220,217 - decl %eax - movups (%r11),%xmm1 - leaq 16(%r11),%r11 - jnz .Loop_enc1_6 -.byte 102,15,56,221,217 - pxor %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - movups %xmm3,(%r9) - pxor %xmm3,%xmm3 - pxor %xmm8,%xmm8 - pxor %xmm6,%xmm6 - .byte 0xf3,0xc3 -.size aes_hw_ccm64_decrypt_blocks,.-aes_hw_ccm64_decrypt_blocks .globl aes_hw_ctr32_encrypt_blocks .hidden aes_hw_ctr32_encrypt_blocks .type aes_hw_ctr32_encrypt_blocks,@function .align 16 aes_hw_ctr32_encrypt_blocks: .cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS + movb $1,BORINGSSL_function_hit(%rip) +#endif +#endif cmpq $1,%rdx jne .Lctr32_bulk @@ -1028,12 +904,12 @@ aes_hw_ctr32_encrypt_blocks: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -.Loop_enc1_7: +.Loop_enc1_5: .byte 102,15,56,220,209 decl %edx movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_7 + jnz .Loop_enc1_5 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 @@ -1590,1839 +1466,6 @@ aes_hw_ctr32_encrypt_blocks: .byte 0xf3,0xc3 .cfi_endproc .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks -.globl aes_hw_xts_encrypt -.hidden aes_hw_xts_encrypt -.type aes_hw_xts_encrypt,@function -.align 16 -aes_hw_xts_encrypt: -.cfi_startproc - leaq (%rsp),%r11 -.cfi_def_cfa_register %r11 - pushq %rbp -.cfi_offset %rbp,-16 - subq $112,%rsp - andq $-16,%rsp - movups (%r9),%xmm2 - movl 240(%r8),%eax - movl 240(%rcx),%r10d - movups (%r8),%xmm0 - movups 16(%r8),%xmm1 - leaq 32(%r8),%r8 - xorps %xmm0,%xmm2 -.Loop_enc1_8: -.byte 102,15,56,220,209 - decl %eax - movups (%r8),%xmm1 - leaq 16(%r8),%r8 - jnz .Loop_enc1_8 -.byte 102,15,56,221,209 - movups (%rcx),%xmm0 - movq %rcx,%rbp - movl %r10d,%eax - shll $4,%r10d - movq %rdx,%r9 - andq $-16,%rdx - - movups 16(%rcx,%r10,1),%xmm1 - - movdqa .Lxts_magic(%rip),%xmm8 - movdqa %xmm2,%xmm15 - pshufd $0x5f,%xmm2,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm10 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm11 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm12 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 - pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - - subq $96,%rdx - jc .Lxts_enc_short - - movl $16+96,%eax - leaq 32(%rbp,%r10,1),%rcx - subq %r10,%rax - movups 16(%rbp),%xmm1 - movq %rax,%r10 - leaq .Lxts_magic(%rip),%r8 - jmp .Lxts_enc_grandloop - -.align 32 -.Lxts_enc_grandloop: - movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 - movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 - movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,220,209 - movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,220,217 - movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,220,225 - movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,220,233 - movups 32(%rbp),%xmm0 - leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 - - pxor %xmm9,%xmm10 -.byte 102,15,56,220,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,220,249 - movups 48(%rbp),%xmm1 - pxor %xmm9,%xmm12 - -.byte 102,15,56,220,208 - pxor %xmm9,%xmm13 - movdqa %xmm11,16(%rsp) -.byte 102,15,56,220,216 - pxor %xmm9,%xmm14 - movdqa %xmm12,32(%rsp) -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - pxor %xmm9,%xmm8 - movdqa %xmm14,64(%rsp) -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 - movups 64(%rbp),%xmm0 - movdqa %xmm8,80(%rsp) - pshufd $0x5f,%xmm15,%xmm9 - jmp .Lxts_enc_loop6 -.align 32 -.Lxts_enc_loop6: -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -64(%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 - movups -80(%rcx,%rax,1),%xmm0 - jnz .Lxts_enc_loop6 - - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 - paddq %xmm15,%xmm15 - psrad $31,%xmm14 -.byte 102,15,56,220,217 - pand %xmm8,%xmm14 - movups (%rbp),%xmm10 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 - pxor %xmm14,%xmm15 - movaps %xmm10,%xmm11 -.byte 102,15,56,220,249 - movups -64(%rcx),%xmm1 - - movdqa %xmm9,%xmm14 -.byte 102,15,56,220,208 - paddd %xmm9,%xmm9 - pxor %xmm15,%xmm10 -.byte 102,15,56,220,216 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - pand %xmm8,%xmm14 - movaps %xmm11,%xmm12 -.byte 102,15,56,220,240 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 -.byte 102,15,56,220,248 - movups -48(%rcx),%xmm0 - - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 -.byte 102,15,56,220,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movdqa %xmm13,48(%rsp) - pxor %xmm14,%xmm15 -.byte 102,15,56,220,241 - movaps %xmm12,%xmm13 - movdqa %xmm9,%xmm14 -.byte 102,15,56,220,249 - movups -32(%rcx),%xmm1 - - paddd %xmm9,%xmm9 -.byte 102,15,56,220,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 - pxor %xmm14,%xmm15 - movaps %xmm13,%xmm14 -.byte 102,15,56,220,248 - - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,220,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - pxor %xmm0,%xmm15 - movups (%rbp),%xmm0 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups 16(%rbp),%xmm1 - - pxor %xmm15,%xmm14 -.byte 102,15,56,221,84,36,0 - psrad $31,%xmm9 - paddq %xmm15,%xmm15 -.byte 102,15,56,221,92,36,16 -.byte 102,15,56,221,100,36,32 - pand %xmm8,%xmm9 - movq %r10,%rax -.byte 102,15,56,221,108,36,48 -.byte 102,15,56,221,116,36,64 -.byte 102,15,56,221,124,36,80 - pxor %xmm9,%xmm15 - - leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) - subq $96,%rdx - jnc .Lxts_enc_grandloop - - movl $16+96,%eax - subl %r10d,%eax - movq %rbp,%rcx - shrl $4,%eax - -.Lxts_enc_short: - - movl %eax,%r10d - pxor %xmm0,%xmm10 - addq $96,%rdx - jz .Lxts_enc_done - - pxor %xmm0,%xmm11 - cmpq $0x20,%rdx - jb .Lxts_enc_one - pxor %xmm0,%xmm12 - je .Lxts_enc_two - - pxor %xmm0,%xmm13 - cmpq $0x40,%rdx - jb .Lxts_enc_three - pxor %xmm0,%xmm14 - je .Lxts_enc_four - - movdqu (%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqu 32(%rdi),%xmm4 - pxor %xmm10,%xmm2 - movdqu 48(%rdi),%xmm5 - pxor %xmm11,%xmm3 - movdqu 64(%rdi),%xmm6 - leaq 80(%rdi),%rdi - pxor %xmm12,%xmm4 - pxor %xmm13,%xmm5 - pxor %xmm14,%xmm6 - pxor %xmm7,%xmm7 - - call _aesni_encrypt6 - - xorps %xmm10,%xmm2 - movdqa %xmm15,%xmm10 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - xorps %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - xorps %xmm14,%xmm6 - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) - movdqu %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_one: - movups (%rdi),%xmm2 - leaq 16(%rdi),%rdi - xorps %xmm10,%xmm2 - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_enc1_9: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_enc1_9 -.byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movdqa %xmm11,%xmm10 - movups %xmm2,(%rsi) - leaq 16(%rsi),%rsi - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_two: - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - leaq 32(%rdi),%rdi - xorps %xmm10,%xmm2 - xorps %xmm11,%xmm3 - - call _aesni_encrypt2 - - xorps %xmm10,%xmm2 - movdqa %xmm12,%xmm10 - xorps %xmm11,%xmm3 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - leaq 32(%rsi),%rsi - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_three: - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - movups 32(%rdi),%xmm4 - leaq 48(%rdi),%rdi - xorps %xmm10,%xmm2 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - - call _aesni_encrypt3 - - xorps %xmm10,%xmm2 - movdqa %xmm13,%xmm10 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - leaq 48(%rsi),%rsi - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_four: - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - movups 32(%rdi),%xmm4 - xorps %xmm10,%xmm2 - movups 48(%rdi),%xmm5 - leaq 64(%rdi),%rdi - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - xorps %xmm13,%xmm5 - - call _aesni_encrypt4 - - pxor %xmm10,%xmm2 - movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_done: - andq $15,%r9 - jz .Lxts_enc_ret - movq %r9,%rdx - -.Lxts_enc_steal: - movzbl (%rdi),%eax - movzbl -16(%rsi),%ecx - leaq 1(%rdi),%rdi - movb %al,-16(%rsi) - movb %cl,0(%rsi) - leaq 1(%rsi),%rsi - subq $1,%rdx - jnz .Lxts_enc_steal - - subq %r9,%rsi - movq %rbp,%rcx - movl %r10d,%eax - - movups -16(%rsi),%xmm2 - xorps %xmm10,%xmm2 - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_enc1_10: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_enc1_10 -.byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,-16(%rsi) - -.Lxts_enc_ret: - xorps %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - pxor %xmm3,%xmm3 - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - pxor %xmm6,%xmm6 - pxor %xmm7,%xmm7 - movaps %xmm0,0(%rsp) - pxor %xmm8,%xmm8 - movaps %xmm0,16(%rsp) - pxor %xmm9,%xmm9 - movaps %xmm0,32(%rsp) - pxor %xmm10,%xmm10 - movaps %xmm0,48(%rsp) - pxor %xmm11,%xmm11 - movaps %xmm0,64(%rsp) - pxor %xmm12,%xmm12 - movaps %xmm0,80(%rsp) - pxor %xmm13,%xmm13 - movaps %xmm0,96(%rsp) - pxor %xmm14,%xmm14 - pxor %xmm15,%xmm15 - movq -8(%r11),%rbp -.cfi_restore %rbp - leaq (%r11),%rsp -.cfi_def_cfa_register %rsp -.Lxts_enc_epilogue: - .byte 0xf3,0xc3 -.cfi_endproc -.size aes_hw_xts_encrypt,.-aes_hw_xts_encrypt -.globl aes_hw_xts_decrypt -.hidden aes_hw_xts_decrypt -.type aes_hw_xts_decrypt,@function -.align 16 -aes_hw_xts_decrypt: -.cfi_startproc - leaq (%rsp),%r11 -.cfi_def_cfa_register %r11 - pushq %rbp -.cfi_offset %rbp,-16 - subq $112,%rsp - andq $-16,%rsp - movups (%r9),%xmm2 - movl 240(%r8),%eax - movl 240(%rcx),%r10d - movups (%r8),%xmm0 - movups 16(%r8),%xmm1 - leaq 32(%r8),%r8 - xorps %xmm0,%xmm2 -.Loop_enc1_11: -.byte 102,15,56,220,209 - decl %eax - movups (%r8),%xmm1 - leaq 16(%r8),%r8 - jnz .Loop_enc1_11 -.byte 102,15,56,221,209 - xorl %eax,%eax - testq $15,%rdx - setnz %al - shlq $4,%rax - subq %rax,%rdx - - movups (%rcx),%xmm0 - movq %rcx,%rbp - movl %r10d,%eax - shll $4,%r10d - movq %rdx,%r9 - andq $-16,%rdx - - movups 16(%rcx,%r10,1),%xmm1 - - movdqa .Lxts_magic(%rip),%xmm8 - movdqa %xmm2,%xmm15 - pshufd $0x5f,%xmm2,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm10 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm11 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm12 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 - movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 - pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - - subq $96,%rdx - jc .Lxts_dec_short - - movl $16+96,%eax - leaq 32(%rbp,%r10,1),%rcx - subq %r10,%rax - movups 16(%rbp),%xmm1 - movq %rax,%r10 - leaq .Lxts_magic(%rip),%r8 - jmp .Lxts_dec_grandloop - -.align 32 -.Lxts_dec_grandloop: - movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 - movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 - movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,222,209 - movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,222,217 - movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,222,225 - movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,222,233 - movups 32(%rbp),%xmm0 - leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 - - pxor %xmm9,%xmm10 -.byte 102,15,56,222,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,222,249 - movups 48(%rbp),%xmm1 - pxor %xmm9,%xmm12 - -.byte 102,15,56,222,208 - pxor %xmm9,%xmm13 - movdqa %xmm11,16(%rsp) -.byte 102,15,56,222,216 - pxor %xmm9,%xmm14 - movdqa %xmm12,32(%rsp) -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 - pxor %xmm9,%xmm8 - movdqa %xmm14,64(%rsp) -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 - movups 64(%rbp),%xmm0 - movdqa %xmm8,80(%rsp) - pshufd $0x5f,%xmm15,%xmm9 - jmp .Lxts_dec_loop6 -.align 32 -.Lxts_dec_loop6: -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -64(%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 - movups -80(%rcx,%rax,1),%xmm0 - jnz .Lxts_dec_loop6 - - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 - paddq %xmm15,%xmm15 - psrad $31,%xmm14 -.byte 102,15,56,222,217 - pand %xmm8,%xmm14 - movups (%rbp),%xmm10 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 - pxor %xmm14,%xmm15 - movaps %xmm10,%xmm11 -.byte 102,15,56,222,249 - movups -64(%rcx),%xmm1 - - movdqa %xmm9,%xmm14 -.byte 102,15,56,222,208 - paddd %xmm9,%xmm9 - pxor %xmm15,%xmm10 -.byte 102,15,56,222,216 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 - pand %xmm8,%xmm14 - movaps %xmm11,%xmm12 -.byte 102,15,56,222,240 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 -.byte 102,15,56,222,248 - movups -48(%rcx),%xmm0 - - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 -.byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - movdqa %xmm13,48(%rsp) - pxor %xmm14,%xmm15 -.byte 102,15,56,222,241 - movaps %xmm12,%xmm13 - movdqa %xmm9,%xmm14 -.byte 102,15,56,222,249 - movups -32(%rcx),%xmm1 - - paddd %xmm9,%xmm9 -.byte 102,15,56,222,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,222,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 - pxor %xmm14,%xmm15 - movaps %xmm13,%xmm14 -.byte 102,15,56,222,248 - - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - pxor %xmm0,%xmm15 - movups (%rbp),%xmm0 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups 16(%rbp),%xmm1 - - pxor %xmm15,%xmm14 -.byte 102,15,56,223,84,36,0 - psrad $31,%xmm9 - paddq %xmm15,%xmm15 -.byte 102,15,56,223,92,36,16 -.byte 102,15,56,223,100,36,32 - pand %xmm8,%xmm9 - movq %r10,%rax -.byte 102,15,56,223,108,36,48 -.byte 102,15,56,223,116,36,64 -.byte 102,15,56,223,124,36,80 - pxor %xmm9,%xmm15 - - leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) - subq $96,%rdx - jnc .Lxts_dec_grandloop - - movl $16+96,%eax - subl %r10d,%eax - movq %rbp,%rcx - shrl $4,%eax - -.Lxts_dec_short: - - movl %eax,%r10d - pxor %xmm0,%xmm10 - pxor %xmm0,%xmm11 - addq $96,%rdx - jz .Lxts_dec_done - - pxor %xmm0,%xmm12 - cmpq $0x20,%rdx - jb .Lxts_dec_one - pxor %xmm0,%xmm13 - je .Lxts_dec_two - - pxor %xmm0,%xmm14 - cmpq $0x40,%rdx - jb .Lxts_dec_three - je .Lxts_dec_four - - movdqu (%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqu 32(%rdi),%xmm4 - pxor %xmm10,%xmm2 - movdqu 48(%rdi),%xmm5 - pxor %xmm11,%xmm3 - movdqu 64(%rdi),%xmm6 - leaq 80(%rdi),%rdi - pxor %xmm12,%xmm4 - pxor %xmm13,%xmm5 - pxor %xmm14,%xmm6 - - call _aesni_decrypt6 - - xorps %xmm10,%xmm2 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - xorps %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - xorps %xmm14,%xmm6 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm14 - movdqu %xmm5,48(%rsi) - pcmpgtd %xmm15,%xmm14 - movdqu %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - pshufd $0x13,%xmm14,%xmm11 - andq $15,%r9 - jz .Lxts_dec_ret - - movdqa %xmm15,%xmm10 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm11 - pxor %xmm15,%xmm11 - jmp .Lxts_dec_done2 - -.align 16 -.Lxts_dec_one: - movups (%rdi),%xmm2 - leaq 16(%rdi),%rdi - xorps %xmm10,%xmm2 - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_dec1_12: -.byte 102,15,56,222,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_dec1_12 -.byte 102,15,56,223,209 - xorps %xmm10,%xmm2 - movdqa %xmm11,%xmm10 - movups %xmm2,(%rsi) - movdqa %xmm12,%xmm11 - leaq 16(%rsi),%rsi - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_two: - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - leaq 32(%rdi),%rdi - xorps %xmm10,%xmm2 - xorps %xmm11,%xmm3 - - call _aesni_decrypt2 - - xorps %xmm10,%xmm2 - movdqa %xmm12,%xmm10 - xorps %xmm11,%xmm3 - movdqa %xmm13,%xmm11 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - leaq 32(%rsi),%rsi - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_three: - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - movups 32(%rdi),%xmm4 - leaq 48(%rdi),%rdi - xorps %xmm10,%xmm2 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - - call _aesni_decrypt3 - - xorps %xmm10,%xmm2 - movdqa %xmm13,%xmm10 - xorps %xmm11,%xmm3 - movdqa %xmm14,%xmm11 - xorps %xmm12,%xmm4 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - leaq 48(%rsi),%rsi - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_four: - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - movups 32(%rdi),%xmm4 - xorps %xmm10,%xmm2 - movups 48(%rdi),%xmm5 - leaq 64(%rdi),%rdi - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - xorps %xmm13,%xmm5 - - call _aesni_decrypt4 - - pxor %xmm10,%xmm2 - movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 - movdqa %xmm15,%xmm11 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_done: - andq $15,%r9 - jz .Lxts_dec_ret -.Lxts_dec_done2: - movq %r9,%rdx - movq %rbp,%rcx - movl %r10d,%eax - - movups (%rdi),%xmm2 - xorps %xmm11,%xmm2 - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_dec1_13: -.byte 102,15,56,222,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_dec1_13 -.byte 102,15,56,223,209 - xorps %xmm11,%xmm2 - movups %xmm2,(%rsi) - -.Lxts_dec_steal: - movzbl 16(%rdi),%eax - movzbl (%rsi),%ecx - leaq 1(%rdi),%rdi - movb %al,(%rsi) - movb %cl,16(%rsi) - leaq 1(%rsi),%rsi - subq $1,%rdx - jnz .Lxts_dec_steal - - subq %r9,%rsi - movq %rbp,%rcx - movl %r10d,%eax - - movups (%rsi),%xmm2 - xorps %xmm10,%xmm2 - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_dec1_14: -.byte 102,15,56,222,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_dec1_14 -.byte 102,15,56,223,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - -.Lxts_dec_ret: - xorps %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - pxor %xmm3,%xmm3 - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - pxor %xmm6,%xmm6 - pxor %xmm7,%xmm7 - movaps %xmm0,0(%rsp) - pxor %xmm8,%xmm8 - movaps %xmm0,16(%rsp) - pxor %xmm9,%xmm9 - movaps %xmm0,32(%rsp) - pxor %xmm10,%xmm10 - movaps %xmm0,48(%rsp) - pxor %xmm11,%xmm11 - movaps %xmm0,64(%rsp) - pxor %xmm12,%xmm12 - movaps %xmm0,80(%rsp) - pxor %xmm13,%xmm13 - movaps %xmm0,96(%rsp) - pxor %xmm14,%xmm14 - pxor %xmm15,%xmm15 - movq -8(%r11),%rbp -.cfi_restore %rbp - leaq (%r11),%rsp -.cfi_def_cfa_register %rsp -.Lxts_dec_epilogue: - .byte 0xf3,0xc3 -.cfi_endproc -.size aes_hw_xts_decrypt,.-aes_hw_xts_decrypt -.globl aes_hw_ocb_encrypt -.hidden aes_hw_ocb_encrypt -.type aes_hw_ocb_encrypt,@function -.align 32 -aes_hw_ocb_encrypt: -.cfi_startproc - leaq (%rsp),%rax - pushq %rbx -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbx,-16 - pushq %rbp -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbp,-24 - pushq %r12 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r12,-32 - pushq %r13 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r13,-40 - pushq %r14 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r14,-48 - movq 8(%rax),%rbx - movq 8+8(%rax),%rbp - - movl 240(%rcx),%r10d - movq %rcx,%r11 - shll $4,%r10d - movups (%rcx),%xmm9 - movups 16(%rcx,%r10,1),%xmm1 - - movdqu (%r9),%xmm15 - pxor %xmm1,%xmm9 - pxor %xmm1,%xmm15 - - movl $16+32,%eax - leaq 32(%r11,%r10,1),%rcx - movups 16(%r11),%xmm1 - subq %r10,%rax - movq %rax,%r10 - - movdqu (%rbx),%xmm10 - movdqu (%rbp),%xmm8 - - testq $1,%r8 - jnz .Locb_enc_odd - - bsfq %r8,%r12 - addq $1,%r8 - shlq $4,%r12 - movdqu (%rbx,%r12,1),%xmm7 - movdqu (%rdi),%xmm2 - leaq 16(%rdi),%rdi - - call __ocb_encrypt1 - - movdqa %xmm7,%xmm15 - movups %xmm2,(%rsi) - leaq 16(%rsi),%rsi - subq $1,%rdx - jz .Locb_enc_done - -.Locb_enc_odd: - leaq 1(%r8),%r12 - leaq 3(%r8),%r13 - leaq 5(%r8),%r14 - leaq 6(%r8),%r8 - bsfq %r12,%r12 - bsfq %r13,%r13 - bsfq %r14,%r14 - shlq $4,%r12 - shlq $4,%r13 - shlq $4,%r14 - - subq $6,%rdx - jc .Locb_enc_short - jmp .Locb_enc_grandloop - -.align 32 -.Locb_enc_grandloop: - movdqu 0(%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqu 32(%rdi),%xmm4 - movdqu 48(%rdi),%xmm5 - movdqu 64(%rdi),%xmm6 - movdqu 80(%rdi),%xmm7 - leaq 96(%rdi),%rdi - - call __ocb_encrypt6 - - movups %xmm2,0(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) - leaq 96(%rsi),%rsi - subq $6,%rdx - jnc .Locb_enc_grandloop - -.Locb_enc_short: - addq $6,%rdx - jz .Locb_enc_done - - movdqu 0(%rdi),%xmm2 - cmpq $2,%rdx - jb .Locb_enc_one - movdqu 16(%rdi),%xmm3 - je .Locb_enc_two - - movdqu 32(%rdi),%xmm4 - cmpq $4,%rdx - jb .Locb_enc_three - movdqu 48(%rdi),%xmm5 - je .Locb_enc_four - - movdqu 64(%rdi),%xmm6 - pxor %xmm7,%xmm7 - - call __ocb_encrypt6 - - movdqa %xmm14,%xmm15 - movups %xmm2,0(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - movups %xmm6,64(%rsi) - - jmp .Locb_enc_done - -.align 16 -.Locb_enc_one: - movdqa %xmm10,%xmm7 - - call __ocb_encrypt1 - - movdqa %xmm7,%xmm15 - movups %xmm2,0(%rsi) - jmp .Locb_enc_done - -.align 16 -.Locb_enc_two: - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - - call __ocb_encrypt4 - - movdqa %xmm11,%xmm15 - movups %xmm2,0(%rsi) - movups %xmm3,16(%rsi) - - jmp .Locb_enc_done - -.align 16 -.Locb_enc_three: - pxor %xmm5,%xmm5 - - call __ocb_encrypt4 - - movdqa %xmm12,%xmm15 - movups %xmm2,0(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - - jmp .Locb_enc_done - -.align 16 -.Locb_enc_four: - call __ocb_encrypt4 - - movdqa %xmm13,%xmm15 - movups %xmm2,0(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - -.Locb_enc_done: - pxor %xmm0,%xmm15 - movdqu %xmm8,(%rbp) - movdqu %xmm15,(%r9) - - xorps %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - pxor %xmm3,%xmm3 - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - pxor %xmm6,%xmm6 - pxor %xmm7,%xmm7 - pxor %xmm8,%xmm8 - pxor %xmm9,%xmm9 - pxor %xmm10,%xmm10 - pxor %xmm11,%xmm11 - pxor %xmm12,%xmm12 - pxor %xmm13,%xmm13 - pxor %xmm14,%xmm14 - pxor %xmm15,%xmm15 - leaq 40(%rsp),%rax -.cfi_def_cfa %rax,8 - movq -40(%rax),%r14 -.cfi_restore %r14 - movq -32(%rax),%r13 -.cfi_restore %r13 - movq -24(%rax),%r12 -.cfi_restore %r12 - movq -16(%rax),%rbp -.cfi_restore %rbp - movq -8(%rax),%rbx -.cfi_restore %rbx - leaq (%rax),%rsp -.cfi_def_cfa_register %rsp -.Locb_enc_epilogue: - .byte 0xf3,0xc3 -.cfi_endproc -.size aes_hw_ocb_encrypt,.-aes_hw_ocb_encrypt - -.type __ocb_encrypt6,@function -.align 32 -__ocb_encrypt6: - pxor %xmm9,%xmm15 - movdqu (%rbx,%r12,1),%xmm11 - movdqa %xmm10,%xmm12 - movdqu (%rbx,%r13,1),%xmm13 - movdqa %xmm10,%xmm14 - pxor %xmm15,%xmm10 - movdqu (%rbx,%r14,1),%xmm15 - pxor %xmm10,%xmm11 - pxor %xmm2,%xmm8 - pxor %xmm10,%xmm2 - pxor %xmm11,%xmm12 - pxor %xmm3,%xmm8 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm13 - pxor %xmm4,%xmm8 - pxor %xmm12,%xmm4 - pxor %xmm13,%xmm14 - pxor %xmm5,%xmm8 - pxor %xmm13,%xmm5 - pxor %xmm14,%xmm15 - pxor %xmm6,%xmm8 - pxor %xmm14,%xmm6 - pxor %xmm7,%xmm8 - pxor %xmm15,%xmm7 - movups 32(%r11),%xmm0 - - leaq 1(%r8),%r12 - leaq 3(%r8),%r13 - leaq 5(%r8),%r14 - addq $6,%r8 - pxor %xmm9,%xmm10 - bsfq %r12,%r12 - bsfq %r13,%r13 - bsfq %r14,%r14 - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - pxor %xmm9,%xmm11 - pxor %xmm9,%xmm12 -.byte 102,15,56,220,241 - pxor %xmm9,%xmm13 - pxor %xmm9,%xmm14 -.byte 102,15,56,220,249 - movups 48(%r11),%xmm1 - pxor %xmm9,%xmm15 - -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 - movups 64(%r11),%xmm0 - shlq $4,%r12 - shlq $4,%r13 - jmp .Locb_enc_loop6 - -.align 32 -.Locb_enc_loop6: -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Locb_enc_loop6 - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups 16(%r11),%xmm1 - shlq $4,%r14 - -.byte 102,65,15,56,221,210 - movdqu (%rbx),%xmm10 - movq %r10,%rax -.byte 102,65,15,56,221,219 -.byte 102,65,15,56,221,228 -.byte 102,65,15,56,221,237 -.byte 102,65,15,56,221,246 -.byte 102,65,15,56,221,255 - .byte 0xf3,0xc3 -.size __ocb_encrypt6,.-__ocb_encrypt6 - -.type __ocb_encrypt4,@function -.align 32 -__ocb_encrypt4: - pxor %xmm9,%xmm15 - movdqu (%rbx,%r12,1),%xmm11 - movdqa %xmm10,%xmm12 - movdqu (%rbx,%r13,1),%xmm13 - pxor %xmm15,%xmm10 - pxor %xmm10,%xmm11 - pxor %xmm2,%xmm8 - pxor %xmm10,%xmm2 - pxor %xmm11,%xmm12 - pxor %xmm3,%xmm8 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm13 - pxor %xmm4,%xmm8 - pxor %xmm12,%xmm4 - pxor %xmm5,%xmm8 - pxor %xmm13,%xmm5 - movups 32(%r11),%xmm0 - - pxor %xmm9,%xmm10 - pxor %xmm9,%xmm11 - pxor %xmm9,%xmm12 - pxor %xmm9,%xmm13 - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movups 48(%r11),%xmm1 - -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - movups 64(%r11),%xmm0 - jmp .Locb_enc_loop4 - -.align 32 -.Locb_enc_loop4: -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Locb_enc_loop4 - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movups 16(%r11),%xmm1 - movq %r10,%rax - -.byte 102,65,15,56,221,210 -.byte 102,65,15,56,221,219 -.byte 102,65,15,56,221,228 -.byte 102,65,15,56,221,237 - .byte 0xf3,0xc3 -.size __ocb_encrypt4,.-__ocb_encrypt4 - -.type __ocb_encrypt1,@function -.align 32 -__ocb_encrypt1: - pxor %xmm15,%xmm7 - pxor %xmm9,%xmm7 - pxor %xmm2,%xmm8 - pxor %xmm7,%xmm2 - movups 32(%r11),%xmm0 - -.byte 102,15,56,220,209 - movups 48(%r11),%xmm1 - pxor %xmm9,%xmm7 - -.byte 102,15,56,220,208 - movups 64(%r11),%xmm0 - jmp .Locb_enc_loop1 - -.align 32 -.Locb_enc_loop1: -.byte 102,15,56,220,209 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,220,208 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Locb_enc_loop1 - -.byte 102,15,56,220,209 - movups 16(%r11),%xmm1 - movq %r10,%rax - -.byte 102,15,56,221,215 - .byte 0xf3,0xc3 -.size __ocb_encrypt1,.-__ocb_encrypt1 - -.globl aes_hw_ocb_decrypt -.hidden aes_hw_ocb_decrypt -.type aes_hw_ocb_decrypt,@function -.align 32 -aes_hw_ocb_decrypt: -.cfi_startproc - leaq (%rsp),%rax - pushq %rbx -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbx,-16 - pushq %rbp -.cfi_adjust_cfa_offset 8 -.cfi_offset %rbp,-24 - pushq %r12 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r12,-32 - pushq %r13 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r13,-40 - pushq %r14 -.cfi_adjust_cfa_offset 8 -.cfi_offset %r14,-48 - movq 8(%rax),%rbx - movq 8+8(%rax),%rbp - - movl 240(%rcx),%r10d - movq %rcx,%r11 - shll $4,%r10d - movups (%rcx),%xmm9 - movups 16(%rcx,%r10,1),%xmm1 - - movdqu (%r9),%xmm15 - pxor %xmm1,%xmm9 - pxor %xmm1,%xmm15 - - movl $16+32,%eax - leaq 32(%r11,%r10,1),%rcx - movups 16(%r11),%xmm1 - subq %r10,%rax - movq %rax,%r10 - - movdqu (%rbx),%xmm10 - movdqu (%rbp),%xmm8 - - testq $1,%r8 - jnz .Locb_dec_odd - - bsfq %r8,%r12 - addq $1,%r8 - shlq $4,%r12 - movdqu (%rbx,%r12,1),%xmm7 - movdqu (%rdi),%xmm2 - leaq 16(%rdi),%rdi - - call __ocb_decrypt1 - - movdqa %xmm7,%xmm15 - movups %xmm2,(%rsi) - xorps %xmm2,%xmm8 - leaq 16(%rsi),%rsi - subq $1,%rdx - jz .Locb_dec_done - -.Locb_dec_odd: - leaq 1(%r8),%r12 - leaq 3(%r8),%r13 - leaq 5(%r8),%r14 - leaq 6(%r8),%r8 - bsfq %r12,%r12 - bsfq %r13,%r13 - bsfq %r14,%r14 - shlq $4,%r12 - shlq $4,%r13 - shlq $4,%r14 - - subq $6,%rdx - jc .Locb_dec_short - jmp .Locb_dec_grandloop - -.align 32 -.Locb_dec_grandloop: - movdqu 0(%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqu 32(%rdi),%xmm4 - movdqu 48(%rdi),%xmm5 - movdqu 64(%rdi),%xmm6 - movdqu 80(%rdi),%xmm7 - leaq 96(%rdi),%rdi - - call __ocb_decrypt6 - - movups %xmm2,0(%rsi) - pxor %xmm2,%xmm8 - movups %xmm3,16(%rsi) - pxor %xmm3,%xmm8 - movups %xmm4,32(%rsi) - pxor %xmm4,%xmm8 - movups %xmm5,48(%rsi) - pxor %xmm5,%xmm8 - movups %xmm6,64(%rsi) - pxor %xmm6,%xmm8 - movups %xmm7,80(%rsi) - pxor %xmm7,%xmm8 - leaq 96(%rsi),%rsi - subq $6,%rdx - jnc .Locb_dec_grandloop - -.Locb_dec_short: - addq $6,%rdx - jz .Locb_dec_done - - movdqu 0(%rdi),%xmm2 - cmpq $2,%rdx - jb .Locb_dec_one - movdqu 16(%rdi),%xmm3 - je .Locb_dec_two - - movdqu 32(%rdi),%xmm4 - cmpq $4,%rdx - jb .Locb_dec_three - movdqu 48(%rdi),%xmm5 - je .Locb_dec_four - - movdqu 64(%rdi),%xmm6 - pxor %xmm7,%xmm7 - - call __ocb_decrypt6 - - movdqa %xmm14,%xmm15 - movups %xmm2,0(%rsi) - pxor %xmm2,%xmm8 - movups %xmm3,16(%rsi) - pxor %xmm3,%xmm8 - movups %xmm4,32(%rsi) - pxor %xmm4,%xmm8 - movups %xmm5,48(%rsi) - pxor %xmm5,%xmm8 - movups %xmm6,64(%rsi) - pxor %xmm6,%xmm8 - - jmp .Locb_dec_done - -.align 16 -.Locb_dec_one: - movdqa %xmm10,%xmm7 - - call __ocb_decrypt1 - - movdqa %xmm7,%xmm15 - movups %xmm2,0(%rsi) - xorps %xmm2,%xmm8 - jmp .Locb_dec_done - -.align 16 -.Locb_dec_two: - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - - call __ocb_decrypt4 - - movdqa %xmm11,%xmm15 - movups %xmm2,0(%rsi) - xorps %xmm2,%xmm8 - movups %xmm3,16(%rsi) - xorps %xmm3,%xmm8 - - jmp .Locb_dec_done - -.align 16 -.Locb_dec_three: - pxor %xmm5,%xmm5 - - call __ocb_decrypt4 - - movdqa %xmm12,%xmm15 - movups %xmm2,0(%rsi) - xorps %xmm2,%xmm8 - movups %xmm3,16(%rsi) - xorps %xmm3,%xmm8 - movups %xmm4,32(%rsi) - xorps %xmm4,%xmm8 - - jmp .Locb_dec_done - -.align 16 -.Locb_dec_four: - call __ocb_decrypt4 - - movdqa %xmm13,%xmm15 - movups %xmm2,0(%rsi) - pxor %xmm2,%xmm8 - movups %xmm3,16(%rsi) - pxor %xmm3,%xmm8 - movups %xmm4,32(%rsi) - pxor %xmm4,%xmm8 - movups %xmm5,48(%rsi) - pxor %xmm5,%xmm8 - -.Locb_dec_done: - pxor %xmm0,%xmm15 - movdqu %xmm8,(%rbp) - movdqu %xmm15,(%r9) - - xorps %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - pxor %xmm3,%xmm3 - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - pxor %xmm6,%xmm6 - pxor %xmm7,%xmm7 - pxor %xmm8,%xmm8 - pxor %xmm9,%xmm9 - pxor %xmm10,%xmm10 - pxor %xmm11,%xmm11 - pxor %xmm12,%xmm12 - pxor %xmm13,%xmm13 - pxor %xmm14,%xmm14 - pxor %xmm15,%xmm15 - leaq 40(%rsp),%rax -.cfi_def_cfa %rax,8 - movq -40(%rax),%r14 -.cfi_restore %r14 - movq -32(%rax),%r13 -.cfi_restore %r13 - movq -24(%rax),%r12 -.cfi_restore %r12 - movq -16(%rax),%rbp -.cfi_restore %rbp - movq -8(%rax),%rbx -.cfi_restore %rbx - leaq (%rax),%rsp -.cfi_def_cfa_register %rsp -.Locb_dec_epilogue: - .byte 0xf3,0xc3 -.cfi_endproc -.size aes_hw_ocb_decrypt,.-aes_hw_ocb_decrypt - -.type __ocb_decrypt6,@function -.align 32 -__ocb_decrypt6: - pxor %xmm9,%xmm15 - movdqu (%rbx,%r12,1),%xmm11 - movdqa %xmm10,%xmm12 - movdqu (%rbx,%r13,1),%xmm13 - movdqa %xmm10,%xmm14 - pxor %xmm15,%xmm10 - movdqu (%rbx,%r14,1),%xmm15 - pxor %xmm10,%xmm11 - pxor %xmm10,%xmm2 - pxor %xmm11,%xmm12 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm13 - pxor %xmm12,%xmm4 - pxor %xmm13,%xmm14 - pxor %xmm13,%xmm5 - pxor %xmm14,%xmm15 - pxor %xmm14,%xmm6 - pxor %xmm15,%xmm7 - movups 32(%r11),%xmm0 - - leaq 1(%r8),%r12 - leaq 3(%r8),%r13 - leaq 5(%r8),%r14 - addq $6,%r8 - pxor %xmm9,%xmm10 - bsfq %r12,%r12 - bsfq %r13,%r13 - bsfq %r14,%r14 - -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - pxor %xmm9,%xmm11 - pxor %xmm9,%xmm12 -.byte 102,15,56,222,241 - pxor %xmm9,%xmm13 - pxor %xmm9,%xmm14 -.byte 102,15,56,222,249 - movups 48(%r11),%xmm1 - pxor %xmm9,%xmm15 - -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 - movups 64(%r11),%xmm0 - shlq $4,%r12 - shlq $4,%r13 - jmp .Locb_dec_loop6 - -.align 32 -.Locb_dec_loop6: -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Locb_dec_loop6 - -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups 16(%r11),%xmm1 - shlq $4,%r14 - -.byte 102,65,15,56,223,210 - movdqu (%rbx),%xmm10 - movq %r10,%rax -.byte 102,65,15,56,223,219 -.byte 102,65,15,56,223,228 -.byte 102,65,15,56,223,237 -.byte 102,65,15,56,223,246 -.byte 102,65,15,56,223,255 - .byte 0xf3,0xc3 -.size __ocb_decrypt6,.-__ocb_decrypt6 - -.type __ocb_decrypt4,@function -.align 32 -__ocb_decrypt4: - pxor %xmm9,%xmm15 - movdqu (%rbx,%r12,1),%xmm11 - movdqa %xmm10,%xmm12 - movdqu (%rbx,%r13,1),%xmm13 - pxor %xmm15,%xmm10 - pxor %xmm10,%xmm11 - pxor %xmm10,%xmm2 - pxor %xmm11,%xmm12 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm13 - pxor %xmm12,%xmm4 - pxor %xmm13,%xmm5 - movups 32(%r11),%xmm0 - - pxor %xmm9,%xmm10 - pxor %xmm9,%xmm11 - pxor %xmm9,%xmm12 - pxor %xmm9,%xmm13 - -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - movups 48(%r11),%xmm1 - -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 - movups 64(%r11),%xmm0 - jmp .Locb_dec_loop4 - -.align 32 -.Locb_dec_loop4: -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Locb_dec_loop4 - -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - movups 16(%r11),%xmm1 - movq %r10,%rax - -.byte 102,65,15,56,223,210 -.byte 102,65,15,56,223,219 -.byte 102,65,15,56,223,228 -.byte 102,65,15,56,223,237 - .byte 0xf3,0xc3 -.size __ocb_decrypt4,.-__ocb_decrypt4 - -.type __ocb_decrypt1,@function -.align 32 -__ocb_decrypt1: - pxor %xmm15,%xmm7 - pxor %xmm9,%xmm7 - pxor %xmm7,%xmm2 - movups 32(%r11),%xmm0 - -.byte 102,15,56,222,209 - movups 48(%r11),%xmm1 - pxor %xmm9,%xmm7 - -.byte 102,15,56,222,208 - movups 64(%r11),%xmm0 - jmp .Locb_dec_loop1 - -.align 32 -.Locb_dec_loop1: -.byte 102,15,56,222,209 - movups (%rcx,%rax,1),%xmm1 - addq $32,%rax - -.byte 102,15,56,222,208 - movups -16(%rcx,%rax,1),%xmm0 - jnz .Locb_dec_loop1 - -.byte 102,15,56,222,209 - movups 16(%r11),%xmm1 - movq %r10,%rax - -.byte 102,15,56,223,215 - .byte 0xf3,0xc3 -.size __ocb_decrypt1,.-__ocb_decrypt1 .globl aes_hw_cbc_encrypt .hidden aes_hw_cbc_encrypt .type aes_hw_cbc_encrypt,@function @@ -3453,12 +1496,12 @@ aes_hw_cbc_encrypt: xorps %xmm0,%xmm3 leaq 32(%rcx),%rcx xorps %xmm3,%xmm2 -.Loop_enc1_15: +.Loop_enc1_6: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_15 + jnz .Loop_enc1_6 .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx @@ -3504,12 +1547,12 @@ aes_hw_cbc_encrypt: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -.Loop_dec1_16: +.Loop_dec1_7: .byte 102,15,56,222,209 decl %r10d movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_16 + jnz .Loop_dec1_7 .byte 102,15,56,223,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 @@ -3922,12 +1965,12 @@ aes_hw_cbc_encrypt: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -.Loop_dec1_17: +.Loop_dec1_8: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_17 + jnz .Loop_dec1_8 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 @@ -4068,6 +2111,11 @@ aes_hw_set_decrypt_key: aes_hw_set_encrypt_key: __aesni_set_encrypt_key: .cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS + movb $1,BORINGSSL_function_hit+3(%rip) +#endif +#endif .byte 0x48,0x83,0xEC,0x08 .cfi_adjust_cfa_offset 8 movq $-1,%rax diff --git a/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S b/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S index 36c01ef9..5236aa66 100644 --- a/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S @@ -21,6 +21,7 @@ .type _bsaes_encrypt8,@function .align 64 _bsaes_encrypt8: +.cfi_startproc leaq .LBS0(%rip),%r11 movdqa (%rax),%xmm8 @@ -488,11 +489,13 @@ _bsaes_encrypt8_bitslice: pxor %xmm7,%xmm15 pxor %xmm7,%xmm0 .byte 0xf3,0xc3 +.cfi_endproc .size _bsaes_encrypt8,.-_bsaes_encrypt8 .type _bsaes_decrypt8,@function .align 64 _bsaes_decrypt8: +.cfi_startproc leaq .LBS0(%rip),%r11 movdqa (%rax),%xmm8 @@ -994,10 +997,12 @@ _bsaes_decrypt8: pxor %xmm7,%xmm15 pxor %xmm7,%xmm0 .byte 0xf3,0xc3 +.cfi_endproc .size _bsaes_decrypt8,.-_bsaes_decrypt8 .type _bsaes_key_convert,@function .align 16 _bsaes_key_convert: +.cfi_startproc leaq .Lmasks(%rip),%r11 movdqu (%rcx),%xmm7 leaq 16(%rcx),%rcx @@ -1076,6 +1081,7 @@ _bsaes_key_convert: movdqa 80(%r11),%xmm7 .byte 0xf3,0xc3 +.cfi_endproc .size _bsaes_key_convert,.-_bsaes_key_convert .extern aes_nohw_cbc_encrypt .hidden aes_nohw_cbc_encrypt @@ -1359,6 +1365,13 @@ bsaes_cbc_encrypt: .align 16 bsaes_ctr32_encrypt_blocks: .cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+6(%rip) +#endif +#endif movq %rsp,%rax .Lctr_enc_prologue: pushq %rbp diff --git a/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S b/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S new file mode 100644 index 00000000..ecf5b66f --- /dev/null +++ b/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S @@ -0,0 +1,426 @@ +# This file is generated from a similarly-named Perl script in the BoringSSL +# source tree. Do not edit by hand. + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +#define OPENSSL_NO_ASM +#endif +#endif + +#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +#if defined(BORINGSSL_PREFIX) +#include <boringssl_prefix_symbols_asm.h> +#endif +.text + + + + + +.type gcm_gmult_ssse3, @function +.globl gcm_gmult_ssse3 +.hidden gcm_gmult_ssse3 +.align 16 +gcm_gmult_ssse3: +.cfi_startproc +.Lgmult_seh_begin: + movdqu (%rdi),%xmm0 + movdqa .Lreverse_bytes(%rip),%xmm10 + movdqa .Llow4_mask(%rip),%xmm2 + + +.byte 102,65,15,56,0,194 + + + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + + + + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +.Loop_row_1: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_1 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +.Loop_row_2: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_2 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $6,%rax +.Loop_row_3: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_3 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + +.byte 102,65,15,56,0,210 + movdqu %xmm2,(%rdi) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.Lgmult_seh_end: +.cfi_endproc +.size gcm_gmult_ssse3,.-gcm_gmult_ssse3 + + + + + +.type gcm_ghash_ssse3, @function +.globl gcm_ghash_ssse3 +.hidden gcm_ghash_ssse3 +.align 16 +gcm_ghash_ssse3: +.Lghash_seh_begin: +.cfi_startproc + movdqu (%rdi),%xmm0 + movdqa .Lreverse_bytes(%rip),%xmm10 + movdqa .Llow4_mask(%rip),%xmm11 + + + andq $-16,%rcx + + + +.byte 102,65,15,56,0,194 + + + pxor %xmm3,%xmm3 +.Loop_ghash: + + movdqu (%rdx),%xmm1 +.byte 102,65,15,56,0,202 + pxor %xmm1,%xmm0 + + + movdqa %xmm11,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm11,%xmm0 + + + + + pxor %xmm2,%xmm2 + + movq $5,%rax +.Loop_row_4: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_4 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $5,%rax +.Loop_row_5: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_5 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movq $6,%rax +.Loop_row_6: + movdqa (%rsi),%xmm4 + leaq 16(%rsi),%rsi + + + movdqa %xmm2,%xmm6 +.byte 102,15,58,15,243,1 + movdqa %xmm6,%xmm3 + psrldq $1,%xmm2 + + + + + movdqa %xmm4,%xmm5 +.byte 102,15,56,0,224 +.byte 102,15,56,0,233 + + + pxor %xmm5,%xmm2 + + + + movdqa %xmm4,%xmm5 + psllq $60,%xmm5 + movdqa %xmm5,%xmm6 + pslldq $8,%xmm6 + pxor %xmm6,%xmm3 + + + psrldq $8,%xmm5 + pxor %xmm5,%xmm2 + psrlq $4,%xmm4 + pxor %xmm4,%xmm2 + + subq $1,%rax + jnz .Loop_row_6 + + + + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $1,%xmm3 + pxor %xmm3,%xmm2 + psrlq $5,%xmm3 + pxor %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,%xmm0 + + + leaq -256(%rsi),%rsi + + + leaq 16(%rdx),%rdx + subq $16,%rcx + jnz .Loop_ghash + + +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.Lghash_seh_end: +.cfi_endproc +.size gcm_ghash_ssse3,.-gcm_ghash_ssse3 + +.align 16 + + +.Lreverse_bytes: +.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.Llow4_mask: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +#endif diff --git a/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S b/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S index ed0946d9..0b36afac 100644 --- a/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S @@ -722,6 +722,7 @@ gcm_ghash_4bit: .type gcm_init_clmul,@function .align 16 gcm_init_clmul: +.cfi_startproc .L_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -873,12 +874,14 @@ gcm_init_clmul: .byte 102,15,58,15,227,8 movdqu %xmm4,80(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size gcm_init_clmul,.-gcm_init_clmul .globl gcm_gmult_clmul .hidden gcm_gmult_clmul .type gcm_gmult_clmul,@function .align 16 gcm_gmult_clmul: +.cfi_startproc .L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 @@ -925,12 +928,14 @@ gcm_gmult_clmul: .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size gcm_gmult_clmul,.-gcm_gmult_clmul .globl gcm_ghash_clmul .hidden gcm_ghash_clmul .type gcm_ghash_clmul,@function .align 32 gcm_ghash_clmul: +.cfi_startproc .L_ghash_clmul: movdqa .Lbswap_mask(%rip),%xmm10 @@ -1310,12 +1315,14 @@ gcm_ghash_clmul: .byte 102,65,15,56,0,194 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size gcm_ghash_clmul,.-gcm_ghash_clmul .globl gcm_init_avx .hidden gcm_init_avx .type gcm_init_avx,@function .align 32 gcm_init_avx: +.cfi_startproc vzeroupper vmovdqu (%rsi),%xmm2 @@ -1418,19 +1425,23 @@ gcm_init_avx: vzeroupper .byte 0xf3,0xc3 +.cfi_endproc .size gcm_init_avx,.-gcm_init_avx .globl gcm_gmult_avx .hidden gcm_gmult_avx .type gcm_gmult_avx,@function .align 32 gcm_gmult_avx: +.cfi_startproc jmp .L_gmult_clmul +.cfi_endproc .size gcm_gmult_avx,.-gcm_gmult_avx .globl gcm_ghash_avx .hidden gcm_ghash_avx .type gcm_ghash_avx,@function .align 32 gcm_ghash_avx: +.cfi_startproc vzeroupper vmovdqu (%rdi),%xmm10 @@ -1802,6 +1813,7 @@ gcm_ghash_avx: vmovdqu %xmm10,(%rdi) vzeroupper .byte 0xf3,0xc3 +.cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx .align 64 .Lbswap_mask: diff --git a/linux-x86_64/crypto/fipsmodule/md5-x86_64.S b/linux-x86_64/crypto/fipsmodule/md5-x86_64.S index 6d08f173..18e2e928 100644 --- a/linux-x86_64/crypto/fipsmodule/md5-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/md5-x86_64.S @@ -18,11 +18,22 @@ .hidden md5_block_asm_data_order .type md5_block_asm_data_order,@function md5_block_asm_data_order: +.cfi_startproc pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp,-16 pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx,-24 pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12,-32 pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14,-40 pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15,-48 .Lprologue: @@ -672,12 +683,19 @@ md5_block_asm_data_order: movl %edx,12(%rbp) movq (%rsp),%r15 +.cfi_restore r15 movq 8(%rsp),%r14 +.cfi_restore r14 movq 16(%rsp),%r12 +.cfi_restore r12 movq 24(%rsp),%rbx +.cfi_restore rbx movq 32(%rsp),%rbp +.cfi_restore rbp addq $40,%rsp +.cfi_adjust_cfa_offset -40 .Lepilogue: .byte 0xf3,0xc3 +.cfi_endproc .size md5_block_asm_data_order,.-md5_block_asm_data_order #endif diff --git a/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S b/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S index 6a3cb1c6..3a575228 100644 --- a/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +++ b/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S @@ -1267,6 +1267,7 @@ ecp_nistz256_mul_mont: .type __ecp_nistz256_mul_montq,@function .align 32 __ecp_nistz256_mul_montq: +.cfi_startproc movq %rax,%rbp @@ -1478,6 +1479,7 @@ __ecp_nistz256_mul_montq: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq @@ -1557,6 +1559,7 @@ ecp_nistz256_sqr_mont: .type __ecp_nistz256_sqr_montq,@function .align 32 __ecp_nistz256_sqr_montq: +.cfi_startproc movq %rax,%r13 mulq %r14 movq %rax,%r9 @@ -1714,10 +1717,12 @@ __ecp_nistz256_sqr_montq: movq %r15,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq .type __ecp_nistz256_mul_montx,@function .align 32 __ecp_nistz256_mul_montx: +.cfi_startproc mulxq %r9,%r8,%r9 @@ -1880,11 +1885,13 @@ __ecp_nistz256_mul_montx: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx .type __ecp_nistz256_sqr_montx,@function .align 32 __ecp_nistz256_sqr_montx: +.cfi_startproc mulxq %r14,%r9,%r10 mulxq %r15,%rcx,%r11 xorl %eax,%eax @@ -2008,6 +2015,7 @@ __ecp_nistz256_sqr_montx: movq %r15,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx @@ -2016,6 +2024,7 @@ __ecp_nistz256_sqr_montx: .type ecp_nistz256_select_w5,@function .align 32 ecp_nistz256_select_w5: +.cfi_startproc leaq OPENSSL_ia32cap_P(%rip),%rax movq 8(%rax),%rax testl $32,%eax @@ -2071,6 +2080,7 @@ ecp_nistz256_select_w5: movdqu %xmm6,64(%rdi) movdqu %xmm7,80(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .LSEH_end_ecp_nistz256_select_w5: .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 @@ -2081,6 +2091,7 @@ ecp_nistz256_select_w5: .type ecp_nistz256_select_w7,@function .align 32 ecp_nistz256_select_w7: +.cfi_startproc leaq OPENSSL_ia32cap_P(%rip),%rax movq 8(%rax),%rax testl $32,%eax @@ -2125,6 +2136,7 @@ ecp_nistz256_select_w7: movdqu %xmm4,32(%rdi) movdqu %xmm5,48(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .LSEH_end_ecp_nistz256_select_w7: .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 @@ -2132,6 +2144,7 @@ ecp_nistz256_select_w7: .type ecp_nistz256_avx2_select_w5,@function .align 32 ecp_nistz256_avx2_select_w5: +.cfi_startproc .Lavx2_select_w5: vzeroupper vmovdqa .LTwo(%rip),%ymm0 @@ -2186,6 +2199,7 @@ ecp_nistz256_avx2_select_w5: vmovdqu %ymm4,64(%rdi) vzeroupper .byte 0xf3,0xc3 +.cfi_endproc .LSEH_end_ecp_nistz256_avx2_select_w5: .size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 @@ -2196,6 +2210,7 @@ ecp_nistz256_avx2_select_w5: .type ecp_nistz256_avx2_select_w7,@function .align 32 ecp_nistz256_avx2_select_w7: +.cfi_startproc .Lavx2_select_w7: vzeroupper vmovdqa .LThree(%rip),%ymm0 @@ -2265,11 +2280,13 @@ ecp_nistz256_avx2_select_w7: vmovdqu %ymm3,32(%rdi) vzeroupper .byte 0xf3,0xc3 +.cfi_endproc .LSEH_end_ecp_nistz256_avx2_select_w7: .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 .type __ecp_nistz256_add_toq,@function .align 32 __ecp_nistz256_add_toq: +.cfi_startproc xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 @@ -2297,11 +2314,13 @@ __ecp_nistz256_add_toq: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq .type __ecp_nistz256_sub_fromq,@function .align 32 __ecp_nistz256_sub_fromq: +.cfi_startproc subq 0(%rbx),%r12 sbbq 8(%rbx),%r13 movq %r12,%rax @@ -2328,11 +2347,13 @@ __ecp_nistz256_sub_fromq: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq .type __ecp_nistz256_subq,@function .align 32 __ecp_nistz256_subq: +.cfi_startproc subq %r12,%rax sbbq %r13,%rbp movq %rax,%r12 @@ -2355,11 +2376,13 @@ __ecp_nistz256_subq: cmovnzq %r10,%r9 .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_subq,.-__ecp_nistz256_subq .type __ecp_nistz256_mul_by_2q,@function .align 32 __ecp_nistz256_mul_by_2q: +.cfi_startproc xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 @@ -2387,6 +2410,7 @@ __ecp_nistz256_mul_by_2q: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q .globl ecp_nistz256_point_double .hidden ecp_nistz256_point_double @@ -2823,7 +2847,9 @@ ecp_nistz256_point_add: .byte 102,72,15,126,206 .byte 102,72,15,126,199 addq $416,%rsp +.cfi_adjust_cfa_offset -416 jmp .Lpoint_double_shortcutq +.cfi_adjust_cfa_offset 416 .align 32 .Ladd_proceedq: @@ -3387,6 +3413,7 @@ ecp_nistz256_point_add_affine: .type __ecp_nistz256_add_tox,@function .align 32 __ecp_nistz256_add_tox: +.cfi_startproc xorq %r11,%r11 adcq 0(%rbx),%r12 adcq 8(%rbx),%r13 @@ -3415,11 +3442,13 @@ __ecp_nistz256_add_tox: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox .type __ecp_nistz256_sub_fromx,@function .align 32 __ecp_nistz256_sub_fromx: +.cfi_startproc xorq %r11,%r11 sbbq 0(%rbx),%r12 sbbq 8(%rbx),%r13 @@ -3448,11 +3477,13 @@ __ecp_nistz256_sub_fromx: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx .type __ecp_nistz256_subx,@function .align 32 __ecp_nistz256_subx: +.cfi_startproc xorq %r11,%r11 sbbq %r12,%rax sbbq %r13,%rbp @@ -3477,11 +3508,13 @@ __ecp_nistz256_subx: cmovcq %r10,%r9 .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_subx,.-__ecp_nistz256_subx .type __ecp_nistz256_mul_by_2x,@function .align 32 __ecp_nistz256_mul_by_2x: +.cfi_startproc xorq %r11,%r11 adcq %r12,%r12 adcq %r13,%r13 @@ -3510,6 +3543,7 @@ __ecp_nistz256_mul_by_2x: movq %r9,24(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x .type ecp_nistz256_point_doublex,@function .align 32 @@ -3934,7 +3968,9 @@ ecp_nistz256_point_addx: .byte 102,72,15,126,206 .byte 102,72,15,126,199 addq $416,%rsp +.cfi_adjust_cfa_offset -416 jmp .Lpoint_double_shortcutx +.cfi_adjust_cfa_offset 416 .align 32 .Ladd_proceedx: diff --git a/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S b/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S index 98a2f8f4..5dfecc85 100644 --- a/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +++ b/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S @@ -23,23 +23,27 @@ beeu_mod_inverse_vartime: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset rbp,-16 - movq %rsp,%rbp -.cfi_def_cfa_register rbp - pushq %r12 +.cfi_adjust_cfa_offset 8 .cfi_offset r12,-24 pushq %r13 +.cfi_adjust_cfa_offset 8 .cfi_offset r13,-32 pushq %r14 +.cfi_adjust_cfa_offset 8 .cfi_offset r14,-40 pushq %r15 +.cfi_adjust_cfa_offset 8 .cfi_offset r15,-48 pushq %rbx +.cfi_adjust_cfa_offset 8 .cfi_offset rbx,-56 pushq %rsi +.cfi_adjust_cfa_offset 8 .cfi_offset rsi,-64 subq $80,%rsp +.cfi_adjust_cfa_offset 80 movq %rdi,0(%rsp) @@ -309,23 +313,30 @@ beeu_mod_inverse_vartime: .Lbeeu_finish: addq $80,%rsp +.cfi_adjust_cfa_offset -80 popq %rsi +.cfi_adjust_cfa_offset -8 .cfi_restore rsi popq %rbx +.cfi_adjust_cfa_offset -8 .cfi_restore rbx popq %r15 +.cfi_adjust_cfa_offset -8 .cfi_restore r15 popq %r14 +.cfi_adjust_cfa_offset -8 .cfi_restore r14 popq %r13 +.cfi_adjust_cfa_offset -8 .cfi_restore r13 popq %r12 +.cfi_adjust_cfa_offset -8 .cfi_restore r12 popq %rbp +.cfi_adjust_cfa_offset -8 .cfi_restore rbp -.cfi_def_cfa rsp, 8 -.cfi_endproc .byte 0xf3,0xc3 +.cfi_endproc .size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime #endif diff --git a/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S b/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S index d7b0cb4b..fefccd6f 100644 --- a/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S @@ -23,14 +23,13 @@ CRYPTO_rdrand: .cfi_startproc xorq %rax,%rax - - -.byte 0x48, 0x0f, 0xc7, 0xf1 +.byte 72,15,199,242 adcq %rax,%rax - movq %rcx,0(%rdi) + movq %rdx,0(%rdi) .byte 0xf3,0xc3 .cfi_endproc +.size CRYPTO_rdrand,.-CRYPTO_rdrand @@ -46,9 +45,7 @@ CRYPTO_rdrand_multiple8_buf: jz .Lout movq $8,%rdx .Lloop: - - -.byte 0x48, 0x0f, 0xc7, 0xf1 +.byte 72,15,199,241 jnc .Lerr movq %rcx,0(%rdi) addq %rdx,%rdi @@ -61,4 +58,5 @@ CRYPTO_rdrand_multiple8_buf: xorq %rax,%rax .byte 0xf3,0xc3 .cfi_endproc +.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf #endif diff --git a/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S b/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S index 4ca2cab0..579c7055 100644 --- a/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S +++ b/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S @@ -1228,6 +1228,7 @@ rsaz_1024_mul_avx2: .type rsaz_1024_red2norm_avx2,@function .align 32 rsaz_1024_red2norm_avx2: +.cfi_startproc subq $-128,%rsi xorq %rax,%rax movq -128(%rsi),%r8 @@ -1419,6 +1420,7 @@ rsaz_1024_red2norm_avx2: movq %rax,120(%rdi) movq %r11,%rax .byte 0xf3,0xc3 +.cfi_endproc .size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 .globl rsaz_1024_norm2red_avx2 @@ -1426,6 +1428,7 @@ rsaz_1024_red2norm_avx2: .type rsaz_1024_norm2red_avx2,@function .align 32 rsaz_1024_norm2red_avx2: +.cfi_startproc subq $-128,%rdi movq (%rsi),%r8 movl $0x1fffffff,%eax @@ -1578,12 +1581,14 @@ rsaz_1024_norm2red_avx2: movq %r8,176(%rdi) movq %r8,184(%rdi) .byte 0xf3,0xc3 +.cfi_endproc .size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 .globl rsaz_1024_scatter5_avx2 .hidden rsaz_1024_scatter5_avx2 .type rsaz_1024_scatter5_avx2,@function .align 32 rsaz_1024_scatter5_avx2: +.cfi_startproc vzeroupper vmovdqu .Lscatter_permd(%rip),%ymm5 shll $4,%edx @@ -1603,6 +1608,7 @@ rsaz_1024_scatter5_avx2: vzeroupper .byte 0xf3,0xc3 +.cfi_endproc .size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 .globl rsaz_1024_gather5_avx2 @@ -1727,25 +1733,6 @@ rsaz_1024_gather5_avx2: .cfi_endproc .LSEH_end_rsaz_1024_gather5: .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P -.globl rsaz_avx2_eligible -.hidden rsaz_avx2_eligible -.type rsaz_avx2_eligible,@function -.align 32 -rsaz_avx2_eligible: - leaq OPENSSL_ia32cap_P(%rip),%rax - movl 8(%rax),%eax - movl $524544,%ecx - movl $0,%edx - andl %eax,%ecx - cmpl $524544,%ecx - cmovel %edx,%eax - andl $32,%eax - shrl $5,%eax - .byte 0xf3,0xc3 -.size rsaz_avx2_eligible,.-rsaz_avx2_eligible - .align 64 .Land_mask: .quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff diff --git a/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S index 41b113a3..4355438e 100644 --- a/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S +++ b/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S @@ -31,6 +31,7 @@ .type _vpaes_encrypt_core,@function .align 16 _vpaes_encrypt_core: +.cfi_startproc movq %rdx,%r9 movq $16,%r11 movl 240(%rdx),%eax @@ -111,6 +112,7 @@ _vpaes_encrypt_core: pxor %xmm4,%xmm0 .byte 102,15,56,0,193 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_encrypt_core,.-_vpaes_encrypt_core @@ -121,6 +123,7 @@ _vpaes_encrypt_core: .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: +.cfi_startproc movq %rdx,%r9 movl 240(%rdx),%eax movdqa %xmm9,%xmm1 @@ -217,6 +220,7 @@ _vpaes_decrypt_core: pxor %xmm4,%xmm0 .byte 102,15,56,0,194 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_decrypt_core,.-_vpaes_decrypt_core @@ -227,6 +231,7 @@ _vpaes_decrypt_core: .type _vpaes_schedule_core,@function .align 16 _vpaes_schedule_core: +.cfi_startproc @@ -393,6 +398,7 @@ _vpaes_schedule_core: pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_schedule_core,.-_vpaes_schedule_core @@ -412,6 +418,7 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: +.cfi_startproc pshufd $0x80,%xmm6,%xmm1 pshufd $0xFE,%xmm7,%xmm0 pxor %xmm1,%xmm6 @@ -420,6 +427,7 @@ _vpaes_schedule_192_smear: movdqa %xmm6,%xmm0 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear @@ -443,6 +451,7 @@ _vpaes_schedule_192_smear: .type _vpaes_schedule_round,@function .align 16 _vpaes_schedule_round: +.cfi_startproc pxor %xmm1,%xmm1 .byte 102,65,15,58,15,200,15 @@ -496,6 +505,7 @@ _vpaes_schedule_low_round: pxor %xmm7,%xmm0 movdqa %xmm0,%xmm7 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_schedule_round,.-_vpaes_schedule_round @@ -510,6 +520,7 @@ _vpaes_schedule_low_round: .type _vpaes_schedule_transform,@function .align 16 _vpaes_schedule_transform: +.cfi_startproc movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 @@ -520,6 +531,7 @@ _vpaes_schedule_transform: .byte 102,15,56,0,193 pxor %xmm2,%xmm0 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_schedule_transform,.-_vpaes_schedule_transform @@ -548,6 +560,7 @@ _vpaes_schedule_transform: .type _vpaes_schedule_mangle,@function .align 16 _vpaes_schedule_mangle: +.cfi_startproc movdqa %xmm0,%xmm4 movdqa .Lk_mc_forward(%rip),%xmm5 testq %rcx,%rcx @@ -612,6 +625,7 @@ _vpaes_schedule_mangle: andq $0x30,%r8 movdqu %xmm3,(%rdx) .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle @@ -622,6 +636,15 @@ _vpaes_schedule_mangle: .type vpaes_set_encrypt_key,@function .align 16 vpaes_set_encrypt_key: +.cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+5(%rip) +#endif +#endif + movl %esi,%eax shrl $5,%eax addl $5,%eax @@ -632,6 +655,7 @@ vpaes_set_encrypt_key: call _vpaes_schedule_core xorl %eax,%eax .byte 0xf3,0xc3 +.cfi_endproc .size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key .globl vpaes_set_decrypt_key @@ -639,6 +663,7 @@ vpaes_set_encrypt_key: .type vpaes_set_decrypt_key,@function .align 16 vpaes_set_decrypt_key: +.cfi_startproc movl %esi,%eax shrl $5,%eax addl $5,%eax @@ -654,6 +679,7 @@ vpaes_set_decrypt_key: call _vpaes_schedule_core xorl %eax,%eax .byte 0xf3,0xc3 +.cfi_endproc .size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key .globl vpaes_encrypt @@ -661,11 +687,20 @@ vpaes_set_decrypt_key: .type vpaes_encrypt,@function .align 16 vpaes_encrypt: +.cfi_startproc +#ifndef NDEBUG +#ifndef BORINGSSL_FIPS +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+4(%rip) +#endif +#endif movdqu (%rdi),%xmm0 call _vpaes_preheat call _vpaes_encrypt_core movdqu %xmm0,(%rsi) .byte 0xf3,0xc3 +.cfi_endproc .size vpaes_encrypt,.-vpaes_encrypt .globl vpaes_decrypt @@ -673,17 +708,20 @@ vpaes_encrypt: .type vpaes_decrypt,@function .align 16 vpaes_decrypt: +.cfi_startproc movdqu (%rdi),%xmm0 call _vpaes_preheat call _vpaes_decrypt_core movdqu %xmm0,(%rsi) .byte 0xf3,0xc3 +.cfi_endproc .size vpaes_decrypt,.-vpaes_decrypt .globl vpaes_cbc_encrypt .hidden vpaes_cbc_encrypt .type vpaes_cbc_encrypt,@function .align 16 vpaes_cbc_encrypt: +.cfi_startproc xchgq %rcx,%rdx subq $16,%rcx jc .Lcbc_abort @@ -719,6 +757,7 @@ vpaes_cbc_encrypt: movdqu %xmm6,(%r8) .Lcbc_abort: .byte 0xf3,0xc3 +.cfi_endproc .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt @@ -729,6 +768,7 @@ vpaes_cbc_encrypt: .type _vpaes_preheat,@function .align 16 _vpaes_preheat: +.cfi_startproc leaq .Lk_s0F(%rip),%r10 movdqa -32(%r10),%xmm10 movdqa -16(%r10),%xmm11 @@ -738,6 +778,7 @@ _vpaes_preheat: movdqa 80(%r10),%xmm15 movdqa 96(%r10),%xmm14 .byte 0xf3,0xc3 +.cfi_endproc .size _vpaes_preheat,.-_vpaes_preheat diff --git a/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S b/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S index 8ac360dd..b12393e2 100644 --- a/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S +++ b/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S @@ -566,6 +566,7 @@ bn_mul4x_mont_gather5: .type mul4x_internal,@function .align 32 mul4x_internal: +.cfi_startproc shlq $5,%r9 movd 8(%rax),%xmm5 leaq .Linc(%rip),%rax @@ -1087,6 +1088,7 @@ mul4x_internal: movq 16(%rbp),%r14 movq 24(%rbp),%r15 jmp .Lsqr4x_sub_entry +.cfi_endproc .size mul4x_internal,.-mul4x_internal .globl bn_power5 .hidden bn_power5 @@ -1232,6 +1234,7 @@ bn_power5: .align 32 bn_sqr8x_internal: __bn_sqr8x_internal: +.cfi_startproc @@ -2006,10 +2009,12 @@ __bn_sqr8x_reduction: cmpq %rdx,%rdi jb .L8x_reduction_loop .byte 0xf3,0xc3 +.cfi_endproc .size bn_sqr8x_internal,.-bn_sqr8x_internal .type __bn_post4x_internal,@function .align 32 __bn_post4x_internal: +.cfi_startproc movq 0(%rbp),%r12 leaq (%rdi,%r9,1),%rbx movq %r9,%rcx @@ -2060,16 +2065,19 @@ __bn_post4x_internal: movq %r9,%r10 negq %r9 .byte 0xf3,0xc3 +.cfi_endproc .size __bn_post4x_internal,.-__bn_post4x_internal .globl bn_from_montgomery .hidden bn_from_montgomery .type bn_from_montgomery,@function .align 32 bn_from_montgomery: +.cfi_startproc testl $7,%r9d jz bn_from_mont8x xorl %eax,%eax .byte 0xf3,0xc3 +.cfi_endproc .size bn_from_montgomery,.-bn_from_montgomery .type bn_from_mont8x,@function @@ -2354,6 +2362,7 @@ bn_mulx4x_mont_gather5: .type mulx4x_internal,@function .align 32 mulx4x_internal: +.cfi_startproc movq %r9,8(%rsp) movq %r9,%r10 negq %r9 @@ -2772,6 +2781,7 @@ mulx4x_internal: movq 16(%rbp),%r14 movq 24(%rbp),%r15 jmp .Lsqrx4x_sub_entry +.cfi_endproc .size mulx4x_internal,.-mulx4x_internal .type bn_powerx5,@function .align 32 @@ -2915,6 +2925,7 @@ bn_powerx5: .align 32 bn_sqrx8x_internal: __bn_sqrx8x_internal: +.cfi_startproc @@ -3526,9 +3537,12 @@ __bn_sqrx8x_reduction: cmpq 8+8(%rsp),%r8 jb .Lsqrx8x_reduction_loop .byte 0xf3,0xc3 +.cfi_endproc .size bn_sqrx8x_internal,.-bn_sqrx8x_internal .align 32 +.type __bn_postx4x_internal,@function __bn_postx4x_internal: +.cfi_startproc movq 0(%rbp),%r12 movq %rcx,%r10 movq %rcx,%r9 @@ -3576,12 +3590,14 @@ __bn_postx4x_internal: negq %r9 .byte 0xf3,0xc3 +.cfi_endproc .size __bn_postx4x_internal,.-__bn_postx4x_internal .globl bn_scatter5 .hidden bn_scatter5 .type bn_scatter5,@function .align 16 bn_scatter5: +.cfi_startproc cmpl $0,%esi jz .Lscatter_epilogue leaq (%rdx,%rcx,8),%rdx @@ -3594,6 +3610,7 @@ bn_scatter5: jnz .Lscatter .Lscatter_epilogue: .byte 0xf3,0xc3 +.cfi_endproc .size bn_scatter5,.-bn_scatter5 .globl bn_gather5 @@ -3601,9 +3618,11 @@ bn_scatter5: .type bn_gather5,@function .align 32 bn_gather5: +.cfi_startproc .LSEH_begin_bn_gather5: .byte 0x4c,0x8d,0x14,0x24 +.cfi_def_cfa_register %r10 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 leaq .Linc(%rip),%rax andq $-16,%rsp @@ -3757,8 +3776,10 @@ bn_gather5: jnz .Lgather leaq (%r10),%rsp +.cfi_def_cfa_register %rsp .byte 0xf3,0xc3 .LSEH_end_bn_gather5: +.cfi_endproc .size bn_gather5,.-bn_gather5 .align 64 .Linc: diff --git a/linux-x86_64/crypto/test/trampoline-x86_64.S b/linux-x86_64/crypto/test/trampoline-x86_64.S new file mode 100644 index 00000000..91a13f3e --- /dev/null +++ b/linux-x86_64/crypto/test/trampoline-x86_64.S @@ -0,0 +1,517 @@ +# This file is generated from a similarly-named Perl script in the BoringSSL +# source tree. Do not edit by hand. + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +#define OPENSSL_NO_ASM +#endif +#endif + +#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +#if defined(BORINGSSL_PREFIX) +#include <boringssl_prefix_symbols_asm.h> +#endif +.text + + + + + + + + +.type abi_test_trampoline, @function +.globl abi_test_trampoline +.hidden abi_test_trampoline +.align 16 +abi_test_trampoline: +.Labi_test_trampoline_seh_begin: +.cfi_startproc + + + + + + + + + + subq $120,%rsp +.cfi_adjust_cfa_offset 120 +.Labi_test_trampoline_seh_prolog_alloc: + movq %r8,48(%rsp) + movq %rbx,64(%rsp) +.cfi_offset rbx, -64 +.Labi_test_trampoline_seh_prolog_rbx: + movq %rbp,72(%rsp) +.cfi_offset rbp, -56 +.Labi_test_trampoline_seh_prolog_rbp: + movq %r12,80(%rsp) +.cfi_offset r12, -48 +.Labi_test_trampoline_seh_prolog_r12: + movq %r13,88(%rsp) +.cfi_offset r13, -40 +.Labi_test_trampoline_seh_prolog_r13: + movq %r14,96(%rsp) +.cfi_offset r14, -32 +.Labi_test_trampoline_seh_prolog_r14: + movq %r15,104(%rsp) +.cfi_offset r15, -24 +.Labi_test_trampoline_seh_prolog_r15: +.Labi_test_trampoline_seh_prolog_end: + movq 0(%rsi),%rbx + movq 8(%rsi),%rbp + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq 32(%rsi),%r14 + movq 40(%rsi),%r15 + + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + + + + + movq %rdx,%r10 + movq %rcx,%r11 + decq %r11 + js .Largs_done + movq (%r10),%rdi + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%rsi + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%rdx + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%rcx + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%r8 + addq $8,%r10 + decq %r11 + js .Largs_done + movq (%r10),%r9 + addq $8,%r10 + leaq 0(%rsp),%rax +.Largs_loop: + decq %r11 + js .Largs_done + + + + + + + movq %r11,56(%rsp) + movq (%r10),%r11 + movq %r11,(%rax) + movq 56(%rsp),%r11 + + addq $8,%r10 + addq $8,%rax + jmp .Largs_loop + +.Largs_done: + movq 32(%rsp),%rax + movq 48(%rsp),%r10 + testq %r10,%r10 + jz .Lno_unwind + + + pushfq + orq $0x100,0(%rsp) + popfq + + + + nop +.globl abi_test_unwind_start +.hidden abi_test_unwind_start +abi_test_unwind_start: + + call *%rax +.globl abi_test_unwind_return +.hidden abi_test_unwind_return +abi_test_unwind_return: + + + + + pushfq + andq $-0x101,0(%rsp) + popfq +.globl abi_test_unwind_stop +.hidden abi_test_unwind_stop +abi_test_unwind_stop: + + jmp .Lcall_done + +.Lno_unwind: + call *%rax + +.Lcall_done: + + movq 40(%rsp),%rsi + movq %rbx,0(%rsi) + movq %rbp,8(%rsi) + movq %r12,16(%rsi) + movq %r13,24(%rsi) + movq %r14,32(%rsi) + movq %r15,40(%rsi) + movq 64(%rsp),%rbx +.cfi_restore rbx + movq 72(%rsp),%rbp +.cfi_restore rbp + movq 80(%rsp),%r12 +.cfi_restore r12 + movq 88(%rsp),%r13 +.cfi_restore r13 + movq 96(%rsp),%r14 +.cfi_restore r14 + movq 104(%rsp),%r15 +.cfi_restore r15 + addq $120,%rsp +.cfi_adjust_cfa_offset -120 + + + .byte 0xf3,0xc3 +.cfi_endproc +.Labi_test_trampoline_seh_end: +.size abi_test_trampoline,.-abi_test_trampoline +.type abi_test_clobber_rax, @function +.globl abi_test_clobber_rax +.hidden abi_test_clobber_rax +.align 16 +abi_test_clobber_rax: + xorq %rax,%rax + .byte 0xf3,0xc3 +.size abi_test_clobber_rax,.-abi_test_clobber_rax +.type abi_test_clobber_rbx, @function +.globl abi_test_clobber_rbx +.hidden abi_test_clobber_rbx +.align 16 +abi_test_clobber_rbx: + xorq %rbx,%rbx + .byte 0xf3,0xc3 +.size abi_test_clobber_rbx,.-abi_test_clobber_rbx +.type abi_test_clobber_rcx, @function +.globl abi_test_clobber_rcx +.hidden abi_test_clobber_rcx +.align 16 +abi_test_clobber_rcx: + xorq %rcx,%rcx + .byte 0xf3,0xc3 +.size abi_test_clobber_rcx,.-abi_test_clobber_rcx +.type abi_test_clobber_rdx, @function +.globl abi_test_clobber_rdx +.hidden abi_test_clobber_rdx +.align 16 +abi_test_clobber_rdx: + xorq %rdx,%rdx + .byte 0xf3,0xc3 +.size abi_test_clobber_rdx,.-abi_test_clobber_rdx +.type abi_test_clobber_rdi, @function +.globl abi_test_clobber_rdi +.hidden abi_test_clobber_rdi +.align 16 +abi_test_clobber_rdi: + xorq %rdi,%rdi + .byte 0xf3,0xc3 +.size abi_test_clobber_rdi,.-abi_test_clobber_rdi +.type abi_test_clobber_rsi, @function +.globl abi_test_clobber_rsi +.hidden abi_test_clobber_rsi +.align 16 +abi_test_clobber_rsi: + xorq %rsi,%rsi + .byte 0xf3,0xc3 +.size abi_test_clobber_rsi,.-abi_test_clobber_rsi +.type abi_test_clobber_rbp, @function +.globl abi_test_clobber_rbp +.hidden abi_test_clobber_rbp +.align 16 +abi_test_clobber_rbp: + xorq %rbp,%rbp + .byte 0xf3,0xc3 +.size abi_test_clobber_rbp,.-abi_test_clobber_rbp +.type abi_test_clobber_r8, @function +.globl abi_test_clobber_r8 +.hidden abi_test_clobber_r8 +.align 16 +abi_test_clobber_r8: + xorq %r8,%r8 + .byte 0xf3,0xc3 +.size abi_test_clobber_r8,.-abi_test_clobber_r8 +.type abi_test_clobber_r9, @function +.globl abi_test_clobber_r9 +.hidden abi_test_clobber_r9 +.align 16 +abi_test_clobber_r9: + xorq %r9,%r9 + .byte 0xf3,0xc3 +.size abi_test_clobber_r9,.-abi_test_clobber_r9 +.type abi_test_clobber_r10, @function +.globl abi_test_clobber_r10 +.hidden abi_test_clobber_r10 +.align 16 +abi_test_clobber_r10: + xorq %r10,%r10 + .byte 0xf3,0xc3 +.size abi_test_clobber_r10,.-abi_test_clobber_r10 +.type abi_test_clobber_r11, @function +.globl abi_test_clobber_r11 +.hidden abi_test_clobber_r11 +.align 16 +abi_test_clobber_r11: + xorq %r11,%r11 + .byte 0xf3,0xc3 +.size abi_test_clobber_r11,.-abi_test_clobber_r11 +.type abi_test_clobber_r12, @function +.globl abi_test_clobber_r12 +.hidden abi_test_clobber_r12 +.align 16 +abi_test_clobber_r12: + xorq %r12,%r12 + .byte 0xf3,0xc3 +.size abi_test_clobber_r12,.-abi_test_clobber_r12 +.type abi_test_clobber_r13, @function +.globl abi_test_clobber_r13 +.hidden abi_test_clobber_r13 +.align 16 +abi_test_clobber_r13: + xorq %r13,%r13 + .byte 0xf3,0xc3 +.size abi_test_clobber_r13,.-abi_test_clobber_r13 +.type abi_test_clobber_r14, @function +.globl abi_test_clobber_r14 +.hidden abi_test_clobber_r14 +.align 16 +abi_test_clobber_r14: + xorq %r14,%r14 + .byte 0xf3,0xc3 +.size abi_test_clobber_r14,.-abi_test_clobber_r14 +.type abi_test_clobber_r15, @function +.globl abi_test_clobber_r15 +.hidden abi_test_clobber_r15 +.align 16 +abi_test_clobber_r15: + xorq %r15,%r15 + .byte 0xf3,0xc3 +.size abi_test_clobber_r15,.-abi_test_clobber_r15 +.type abi_test_clobber_xmm0, @function +.globl abi_test_clobber_xmm0 +.hidden abi_test_clobber_xmm0 +.align 16 +abi_test_clobber_xmm0: + pxor %xmm0,%xmm0 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm0,.-abi_test_clobber_xmm0 +.type abi_test_clobber_xmm1, @function +.globl abi_test_clobber_xmm1 +.hidden abi_test_clobber_xmm1 +.align 16 +abi_test_clobber_xmm1: + pxor %xmm1,%xmm1 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm1,.-abi_test_clobber_xmm1 +.type abi_test_clobber_xmm2, @function +.globl abi_test_clobber_xmm2 +.hidden abi_test_clobber_xmm2 +.align 16 +abi_test_clobber_xmm2: + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm2,.-abi_test_clobber_xmm2 +.type abi_test_clobber_xmm3, @function +.globl abi_test_clobber_xmm3 +.hidden abi_test_clobber_xmm3 +.align 16 +abi_test_clobber_xmm3: + pxor %xmm3,%xmm3 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm3,.-abi_test_clobber_xmm3 +.type abi_test_clobber_xmm4, @function +.globl abi_test_clobber_xmm4 +.hidden abi_test_clobber_xmm4 +.align 16 +abi_test_clobber_xmm4: + pxor %xmm4,%xmm4 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm4,.-abi_test_clobber_xmm4 +.type abi_test_clobber_xmm5, @function +.globl abi_test_clobber_xmm5 +.hidden abi_test_clobber_xmm5 +.align 16 +abi_test_clobber_xmm5: + pxor %xmm5,%xmm5 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm5,.-abi_test_clobber_xmm5 +.type abi_test_clobber_xmm6, @function +.globl abi_test_clobber_xmm6 +.hidden abi_test_clobber_xmm6 +.align 16 +abi_test_clobber_xmm6: + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm6,.-abi_test_clobber_xmm6 +.type abi_test_clobber_xmm7, @function +.globl abi_test_clobber_xmm7 +.hidden abi_test_clobber_xmm7 +.align 16 +abi_test_clobber_xmm7: + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm7,.-abi_test_clobber_xmm7 +.type abi_test_clobber_xmm8, @function +.globl abi_test_clobber_xmm8 +.hidden abi_test_clobber_xmm8 +.align 16 +abi_test_clobber_xmm8: + pxor %xmm8,%xmm8 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm8,.-abi_test_clobber_xmm8 +.type abi_test_clobber_xmm9, @function +.globl abi_test_clobber_xmm9 +.hidden abi_test_clobber_xmm9 +.align 16 +abi_test_clobber_xmm9: + pxor %xmm9,%xmm9 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm9,.-abi_test_clobber_xmm9 +.type abi_test_clobber_xmm10, @function +.globl abi_test_clobber_xmm10 +.hidden abi_test_clobber_xmm10 +.align 16 +abi_test_clobber_xmm10: + pxor %xmm10,%xmm10 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm10,.-abi_test_clobber_xmm10 +.type abi_test_clobber_xmm11, @function +.globl abi_test_clobber_xmm11 +.hidden abi_test_clobber_xmm11 +.align 16 +abi_test_clobber_xmm11: + pxor %xmm11,%xmm11 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm11,.-abi_test_clobber_xmm11 +.type abi_test_clobber_xmm12, @function +.globl abi_test_clobber_xmm12 +.hidden abi_test_clobber_xmm12 +.align 16 +abi_test_clobber_xmm12: + pxor %xmm12,%xmm12 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm12,.-abi_test_clobber_xmm12 +.type abi_test_clobber_xmm13, @function +.globl abi_test_clobber_xmm13 +.hidden abi_test_clobber_xmm13 +.align 16 +abi_test_clobber_xmm13: + pxor %xmm13,%xmm13 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm13,.-abi_test_clobber_xmm13 +.type abi_test_clobber_xmm14, @function +.globl abi_test_clobber_xmm14 +.hidden abi_test_clobber_xmm14 +.align 16 +abi_test_clobber_xmm14: + pxor %xmm14,%xmm14 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm14,.-abi_test_clobber_xmm14 +.type abi_test_clobber_xmm15, @function +.globl abi_test_clobber_xmm15 +.hidden abi_test_clobber_xmm15 +.align 16 +abi_test_clobber_xmm15: + pxor %xmm15,%xmm15 + .byte 0xf3,0xc3 +.size abi_test_clobber_xmm15,.-abi_test_clobber_xmm15 + + + +.type abi_test_bad_unwind_wrong_register, @function +.globl abi_test_bad_unwind_wrong_register +.hidden abi_test_bad_unwind_wrong_register +.align 16 +abi_test_bad_unwind_wrong_register: +.cfi_startproc +.Labi_test_bad_unwind_wrong_register_seh_begin: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-16 +.Labi_test_bad_unwind_wrong_register_seh_push_r13: + + + + nop + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + .byte 0xf3,0xc3 +.Labi_test_bad_unwind_wrong_register_seh_end: +.cfi_endproc +.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register + + + + +.type abi_test_bad_unwind_temporary, @function +.globl abi_test_bad_unwind_temporary +.hidden abi_test_bad_unwind_temporary +.align 16 +abi_test_bad_unwind_temporary: +.cfi_startproc +.Labi_test_bad_unwind_temporary_seh_begin: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 +.Labi_test_bad_unwind_temporary_seh_push_r12: + + movq %r12,%rax + incq %rax + movq %rax,(%rsp) + + + + movq %r12,(%rsp) + + + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + .byte 0xf3,0xc3 +.Labi_test_bad_unwind_temporary_seh_end: +.cfi_endproc +.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary + + + + +.type abi_test_set_direction_flag, @function +.globl abi_test_get_and_clear_direction_flag +.hidden abi_test_get_and_clear_direction_flag +abi_test_get_and_clear_direction_flag: + pushfq + popq %rax + andq $0x400,%rax + shrq $10,%rax + cld + .byte 0xf3,0xc3 +.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag + + + +.type abi_test_set_direction_flag, @function +.globl abi_test_set_direction_flag +.hidden abi_test_set_direction_flag +abi_test_set_direction_flag: + std + .byte 0xf3,0xc3 +.size abi_test_set_direction_flag,.-abi_test_set_direction_flag +#endif |