summaryrefslogtreecommitdiff
path: root/linux-x86_64
diff options
context:
space:
mode:
authorRobert Sloan <varomodt@google.com>2019-03-01 15:53:37 -0800
committerRobert Sloan <varomodt@google.com>2019-03-01 16:02:19 -0800
commit4c22c5fad19b2a554bcb056ca25ca4cc2ef6a45c (patch)
treefb0dc666dadcc955218e7a701ea482a9270d1a53 /linux-x86_64
parentb6b07c32187eec60f4c9f27e0959c58d145f87ef (diff)
downloadboringssl-4c22c5fad19b2a554bcb056ca25ca4cc2ef6a45c.tar.gz
external/boringssl: Sync to c3889634a1aa52575c5d26497696238208fbd0f5.
This includes the following changes: https://boringssl.googlesource.com/boringssl/+log/41c10e2b5f37edce8b9f292f7f3bacb7e30e25c4..c3889634a1aa52575c5d26497696238208fbd0f5 Test: atest CtsLibcoreTestCases Change-Id: Ia1c2941ccf58a9e0d736b3409a2d13c21603a205
Diffstat (limited to 'linux-x86_64')
-rw-r--r--linux-x86_64/crypto/chacha/chacha-x86_64.S34
-rw-r--r--linux-x86_64/crypto/fipsmodule/aes-x86_64.S16
-rw-r--r--linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S7
-rw-r--r--linux-x86_64/crypto/fipsmodule/aesni-x86_64.S2054
-rw-r--r--linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S13
-rw-r--r--linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S426
-rw-r--r--linux-x86_64/crypto/fipsmodule/ghash-x86_64.S12
-rw-r--r--linux-x86_64/crypto/fipsmodule/md5-x86_64.S18
-rw-r--r--linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S36
-rw-r--r--linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S21
-rw-r--r--linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S12
-rw-r--r--linux-x86_64/crypto/fipsmodule/rsaz-avx2.S25
-rw-r--r--linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S41
-rw-r--r--linux-x86_64/crypto/fipsmodule/x86_64-mont5.S21
-rw-r--r--linux-x86_64/crypto/test/trampoline-x86_64.S517
15 files changed, 1217 insertions, 2036 deletions
diff --git a/linux-x86_64/crypto/chacha/chacha-x86_64.S b/linux-x86_64/crypto/chacha/chacha-x86_64.S
index 785b2dc4..4e2267bb 100644
--- a/linux-x86_64/crypto/chacha/chacha-x86_64.S
+++ b/linux-x86_64/crypto/chacha/chacha-x86_64.S
@@ -50,6 +50,7 @@
.type ChaCha20_ctr32,@function
.align 64
ChaCha20_ctr32:
+.cfi_startproc
cmpq $0,%rdx
je .Lno_data
movq OPENSSL_ia32cap_P+4(%rip),%r10
@@ -57,12 +58,25 @@ ChaCha20_ctr32:
jnz .LChaCha20_ssse3
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset r15,-56
subq $64+24,%rsp
+.cfi_adjust_cfa_offset 88
.Lctr32_body:
@@ -303,20 +317,30 @@ ChaCha20_ctr32:
.Ldone:
leaq 64+24+48(%rsp),%rsi
movq -48(%rsi),%r15
+.cfi_restore r15
movq -40(%rsi),%r14
+.cfi_restore r14
movq -32(%rsi),%r13
+.cfi_restore r13
movq -24(%rsi),%r12
+.cfi_restore r12
movq -16(%rsi),%rbp
+.cfi_restore rbp
movq -8(%rsi),%rbx
+.cfi_restore rbx
leaq (%rsi),%rsp
+.cfi_adjust_cfa_offset -136
.Lno_data:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.type ChaCha20_ssse3,@function
.align 32
ChaCha20_ssse3:
.LChaCha20_ssse3:
+.cfi_startproc
movq %rsp,%r9
+.cfi_def_cfa_register r9
cmpq $128,%rdx
ja .LChaCha20_4x
@@ -442,14 +466,18 @@ ChaCha20_ssse3:
.Ldone_ssse3:
leaq (%r9),%rsp
+.cfi_def_cfa_register rsp
.Lssse3_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_ssse3,.-ChaCha20_ssse3
.type ChaCha20_4x,@function
.align 32
ChaCha20_4x:
.LChaCha20_4x:
+.cfi_startproc
movq %rsp,%r9
+.cfi_def_cfa_register r9
movq %r10,%r11
shrq $32,%r10
testq $32,%r10
@@ -990,14 +1018,18 @@ ChaCha20_4x:
.Ldone4x:
leaq (%r9),%rsp
+.cfi_def_cfa_register rsp
.L4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
.type ChaCha20_8x,@function
.align 32
ChaCha20_8x:
.LChaCha20_8x:
+.cfi_startproc
movq %rsp,%r9
+.cfi_def_cfa_register r9
subq $0x280+8,%rsp
andq $-32,%rsp
vzeroupper
@@ -1592,7 +1624,9 @@ ChaCha20_8x:
.Ldone8x:
vzeroall
leaq (%r9),%rsp
+.cfi_def_cfa_register rsp
.L8x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
#endif
diff --git a/linux-x86_64/crypto/fipsmodule/aes-x86_64.S b/linux-x86_64/crypto/fipsmodule/aes-x86_64.S
index 0dca2617..f45e010e 100644
--- a/linux-x86_64/crypto/fipsmodule/aes-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/aes-x86_64.S
@@ -168,6 +168,7 @@ _x86_64_AES_encrypt:
.type _x86_64_AES_encrypt_compact,@function
.align 16
_x86_64_AES_encrypt_compact:
+.cfi_startproc
leaq 128(%r14),%r8
movl 0-128(%r8),%edi
movl 32-128(%r8),%ebp
@@ -337,6 +338,7 @@ _x86_64_AES_encrypt_compact:
xorl 8(%r15),%ecx
xorl 12(%r15),%edx
.byte 0xf3,0xc3
+.cfi_endproc
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
.align 16
.globl aes_nohw_encrypt
@@ -580,6 +582,7 @@ _x86_64_AES_decrypt:
.type _x86_64_AES_decrypt_compact,@function
.align 16
_x86_64_AES_decrypt_compact:
+.cfi_startproc
leaq 128(%r14),%r8
movl 0-128(%r8),%edi
movl 32-128(%r8),%ebp
@@ -801,6 +804,7 @@ _x86_64_AES_decrypt_compact:
xorl 8(%r15),%ecx
xorl 12(%r15),%edx
.byte 0xf3,0xc3
+.cfi_endproc
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
.align 16
.globl aes_nohw_decrypt
@@ -932,6 +936,7 @@ aes_nohw_set_encrypt_key:
.type _x86_64_AES_set_encrypt_key,@function
.align 16
_x86_64_AES_set_encrypt_key:
+.cfi_startproc
movl %esi,%ecx
movq %rdi,%rsi
movq %rdx,%rdi
@@ -1167,6 +1172,7 @@ _x86_64_AES_set_encrypt_key:
movq $-1,%rax
.Lexit:
.byte 0xf3,0xc3
+.cfi_endproc
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
.align 16
.globl aes_nohw_set_decrypt_key
@@ -1390,8 +1396,9 @@ aes_nohw_cbc_encrypt:
cmpq $0,%rdx
je .Lcbc_epilogue
pushfq
+
+
.cfi_adjust_cfa_offset 8
-.cfi_offset 49,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
@@ -1420,6 +1427,7 @@ aes_nohw_cbc_encrypt:
cmpq $0,%r9
cmoveq %r10,%r14
+.cfi_remember_state
leaq OPENSSL_ia32cap_P(%rip),%r10
movl (%r10),%r10d
cmpq $512,%rdx
@@ -1656,6 +1664,7 @@ aes_nohw_cbc_encrypt:
.align 16
.Lcbc_slow_prologue:
+.cfi_restore_state
leaq -88(%rsp),%rbp
andq $-64,%rbp
@@ -1667,8 +1676,10 @@ aes_nohw_cbc_encrypt:
subq %r10,%rbp
xchgq %rsp,%rbp
+.cfi_def_cfa_register %rbp
movq %rbp,16(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
.Lcbc_slow_body:
@@ -1857,8 +1868,9 @@ aes_nohw_cbc_encrypt:
.cfi_def_cfa %rsp,16
.Lcbc_popfq:
popfq
+
+
.cfi_adjust_cfa_offset -8
-.cfi_restore 49
.Lcbc_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
diff --git a/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S b/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
index 066f4774..240cb5d4 100644
--- a/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
@@ -556,6 +556,13 @@ _aesni_ctr32_6x:
.align 32
aesni_gcm_encrypt:
.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern BORINGSSL_function_hit
+.hidden BORINGSSL_function_hit
+ movb $1,BORINGSSL_function_hit+2(%rip)
+#endif
+#endif
xorq %r10,%r10
diff --git a/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S b/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
index 9ea98246..42e55307 100644
--- a/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
@@ -19,6 +19,14 @@
.type aes_hw_encrypt,@function
.align 16
aes_hw_encrypt:
+.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern BORINGSSL_function_hit
+.hidden BORINGSSL_function_hit
+ movb $1,BORINGSSL_function_hit+1(%rip)
+#endif
+#endif
movups (%rdi),%xmm2
movl 240(%rdx),%eax
movups (%rdx),%xmm0
@@ -37,6 +45,7 @@ aes_hw_encrypt:
movups %xmm2,(%rsi)
pxor %xmm2,%xmm2
.byte 0xf3,0xc3
+.cfi_endproc
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
@@ -44,6 +53,7 @@ aes_hw_encrypt:
.type aes_hw_decrypt,@function
.align 16
aes_hw_decrypt:
+.cfi_startproc
movups (%rdi),%xmm2
movl 240(%rdx),%eax
movups (%rdx),%xmm0
@@ -62,10 +72,12 @@ aes_hw_decrypt:
movups %xmm2,(%rsi)
pxor %xmm2,%xmm2
.byte 0xf3,0xc3
+.cfi_endproc
.size aes_hw_decrypt, .-aes_hw_decrypt
.type _aesni_encrypt2,@function
.align 16
_aesni_encrypt2:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -91,10 +103,12 @@ _aesni_encrypt2:
.byte 102,15,56,221,208
.byte 102,15,56,221,216
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_encrypt2,.-_aesni_encrypt2
.type _aesni_decrypt2,@function
.align 16
_aesni_decrypt2:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -120,10 +134,12 @@ _aesni_decrypt2:
.byte 102,15,56,223,208
.byte 102,15,56,223,216
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_decrypt2,.-_aesni_decrypt2
.type _aesni_encrypt3,@function
.align 16
_aesni_encrypt3:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -154,10 +170,12 @@ _aesni_encrypt3:
.byte 102,15,56,221,216
.byte 102,15,56,221,224
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_encrypt3,.-_aesni_encrypt3
.type _aesni_decrypt3,@function
.align 16
_aesni_decrypt3:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -188,10 +206,12 @@ _aesni_decrypt3:
.byte 102,15,56,223,216
.byte 102,15,56,223,224
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_decrypt3,.-_aesni_decrypt3
.type _aesni_encrypt4,@function
.align 16
_aesni_encrypt4:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -228,10 +248,12 @@ _aesni_encrypt4:
.byte 102,15,56,221,224
.byte 102,15,56,221,232
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_encrypt4,.-_aesni_encrypt4
.type _aesni_decrypt4,@function
.align 16
_aesni_decrypt4:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -268,10 +290,12 @@ _aesni_decrypt4:
.byte 102,15,56,223,224
.byte 102,15,56,223,232
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_decrypt4,.-_aesni_decrypt4
.type _aesni_encrypt6,@function
.align 16
_aesni_encrypt6:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -322,10 +346,12 @@ _aesni_encrypt6:
.byte 102,15,56,221,240
.byte 102,15,56,221,248
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_encrypt6,.-_aesni_encrypt6
.type _aesni_decrypt6,@function
.align 16
_aesni_decrypt6:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -376,10 +402,12 @@ _aesni_decrypt6:
.byte 102,15,56,223,240
.byte 102,15,56,223,248
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_decrypt6,.-_aesni_decrypt6
.type _aesni_encrypt8,@function
.align 16
_aesni_encrypt8:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -440,10 +468,12 @@ _aesni_encrypt8:
.byte 102,68,15,56,221,192
.byte 102,68,15,56,221,200
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_encrypt8,.-_aesni_encrypt8
.type _aesni_decrypt8,@function
.align 16
_aesni_decrypt8:
+.cfi_startproc
movups (%rcx),%xmm0
shll $4,%eax
movups 16(%rcx),%xmm1
@@ -504,12 +534,14 @@ _aesni_decrypt8:
.byte 102,68,15,56,223,192
.byte 102,68,15,56,223,200
.byte 0xf3,0xc3
+.cfi_endproc
.size _aesni_decrypt8,.-_aesni_decrypt8
.globl aes_hw_ecb_encrypt
.hidden aes_hw_ecb_encrypt
.type aes_hw_ecb_encrypt,@function
.align 16
aes_hw_ecb_encrypt:
+.cfi_startproc
andq $-16,%rdx
jz .Lecb_ret
@@ -847,175 +879,19 @@ aes_hw_ecb_encrypt:
xorps %xmm0,%xmm0
pxor %xmm1,%xmm1
.byte 0xf3,0xc3
+.cfi_endproc
.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
-.globl aes_hw_ccm64_encrypt_blocks
-.hidden aes_hw_ccm64_encrypt_blocks
-.type aes_hw_ccm64_encrypt_blocks,@function
-.align 16
-aes_hw_ccm64_encrypt_blocks:
- movl 240(%rcx),%eax
- movdqu (%r8),%xmm6
- movdqa .Lincrement64(%rip),%xmm9
- movdqa .Lbswap_mask(%rip),%xmm7
-
- shll $4,%eax
- movl $16,%r10d
- leaq 0(%rcx),%r11
- movdqu (%r9),%xmm3
- movdqa %xmm6,%xmm2
- leaq 32(%rcx,%rax,1),%rcx
-.byte 102,15,56,0,247
- subq %rax,%r10
- jmp .Lccm64_enc_outer
-.align 16
-.Lccm64_enc_outer:
- movups (%r11),%xmm0
- movq %r10,%rax
- movups (%rdi),%xmm8
-
- xorps %xmm0,%xmm2
- movups 16(%r11),%xmm1
- xorps %xmm8,%xmm0
- xorps %xmm0,%xmm3
- movups 32(%r11),%xmm0
-
-.Lccm64_enc2_loop:
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Lccm64_enc2_loop
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
- paddq %xmm9,%xmm6
- decq %rdx
-.byte 102,15,56,221,208
-.byte 102,15,56,221,216
-
- leaq 16(%rdi),%rdi
- xorps %xmm2,%xmm8
- movdqa %xmm6,%xmm2
- movups %xmm8,(%rsi)
-.byte 102,15,56,0,215
- leaq 16(%rsi),%rsi
- jnz .Lccm64_enc_outer
-
- pxor %xmm0,%xmm0
- pxor %xmm1,%xmm1
- pxor %xmm2,%xmm2
- movups %xmm3,(%r9)
- pxor %xmm3,%xmm3
- pxor %xmm8,%xmm8
- pxor %xmm6,%xmm6
- .byte 0xf3,0xc3
-.size aes_hw_ccm64_encrypt_blocks,.-aes_hw_ccm64_encrypt_blocks
-.globl aes_hw_ccm64_decrypt_blocks
-.hidden aes_hw_ccm64_decrypt_blocks
-.type aes_hw_ccm64_decrypt_blocks,@function
-.align 16
-aes_hw_ccm64_decrypt_blocks:
- movl 240(%rcx),%eax
- movups (%r8),%xmm6
- movdqu (%r9),%xmm3
- movdqa .Lincrement64(%rip),%xmm9
- movdqa .Lbswap_mask(%rip),%xmm7
-
- movaps %xmm6,%xmm2
- movl %eax,%r10d
- movq %rcx,%r11
-.byte 102,15,56,0,247
- movups (%rcx),%xmm0
- movups 16(%rcx),%xmm1
- leaq 32(%rcx),%rcx
- xorps %xmm0,%xmm2
-.Loop_enc1_5:
-.byte 102,15,56,220,209
- decl %eax
- movups (%rcx),%xmm1
- leaq 16(%rcx),%rcx
- jnz .Loop_enc1_5
-.byte 102,15,56,221,209
- shll $4,%r10d
- movl $16,%eax
- movups (%rdi),%xmm8
- paddq %xmm9,%xmm6
- leaq 16(%rdi),%rdi
- subq %r10,%rax
- leaq 32(%r11,%r10,1),%rcx
- movq %rax,%r10
- jmp .Lccm64_dec_outer
-.align 16
-.Lccm64_dec_outer:
- xorps %xmm2,%xmm8
- movdqa %xmm6,%xmm2
- movups %xmm8,(%rsi)
- leaq 16(%rsi),%rsi
-.byte 102,15,56,0,215
-
- subq $1,%rdx
- jz .Lccm64_dec_break
-
- movups (%r11),%xmm0
- movq %r10,%rax
- movups 16(%r11),%xmm1
- xorps %xmm0,%xmm8
- xorps %xmm0,%xmm2
- xorps %xmm8,%xmm3
- movups 32(%r11),%xmm0
- jmp .Lccm64_dec2_loop
-.align 16
-.Lccm64_dec2_loop:
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Lccm64_dec2_loop
- movups (%rdi),%xmm8
- paddq %xmm9,%xmm6
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,221,208
-.byte 102,15,56,221,216
- leaq 16(%rdi),%rdi
- jmp .Lccm64_dec_outer
-
-.align 16
-.Lccm64_dec_break:
-
- movl 240(%r11),%eax
- movups (%r11),%xmm0
- movups 16(%r11),%xmm1
- xorps %xmm0,%xmm8
- leaq 32(%r11),%r11
- xorps %xmm8,%xmm3
-.Loop_enc1_6:
-.byte 102,15,56,220,217
- decl %eax
- movups (%r11),%xmm1
- leaq 16(%r11),%r11
- jnz .Loop_enc1_6
-.byte 102,15,56,221,217
- pxor %xmm0,%xmm0
- pxor %xmm1,%xmm1
- pxor %xmm2,%xmm2
- movups %xmm3,(%r9)
- pxor %xmm3,%xmm3
- pxor %xmm8,%xmm8
- pxor %xmm6,%xmm6
- .byte 0xf3,0xc3
-.size aes_hw_ccm64_decrypt_blocks,.-aes_hw_ccm64_decrypt_blocks
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,@function
.align 16
aes_hw_ctr32_encrypt_blocks:
.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+ movb $1,BORINGSSL_function_hit(%rip)
+#endif
+#endif
cmpq $1,%rdx
jne .Lctr32_bulk
@@ -1028,12 +904,12 @@ aes_hw_ctr32_encrypt_blocks:
movups 16(%rcx),%xmm1
leaq 32(%rcx),%rcx
xorps %xmm0,%xmm2
-.Loop_enc1_7:
+.Loop_enc1_5:
.byte 102,15,56,220,209
decl %edx
movups (%rcx),%xmm1
leaq 16(%rcx),%rcx
- jnz .Loop_enc1_7
+ jnz .Loop_enc1_5
.byte 102,15,56,221,209
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
@@ -1590,1839 +1466,6 @@ aes_hw_ctr32_encrypt_blocks:
.byte 0xf3,0xc3
.cfi_endproc
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
-.globl aes_hw_xts_encrypt
-.hidden aes_hw_xts_encrypt
-.type aes_hw_xts_encrypt,@function
-.align 16
-aes_hw_xts_encrypt:
-.cfi_startproc
- leaq (%rsp),%r11
-.cfi_def_cfa_register %r11
- pushq %rbp
-.cfi_offset %rbp,-16
- subq $112,%rsp
- andq $-16,%rsp
- movups (%r9),%xmm2
- movl 240(%r8),%eax
- movl 240(%rcx),%r10d
- movups (%r8),%xmm0
- movups 16(%r8),%xmm1
- leaq 32(%r8),%r8
- xorps %xmm0,%xmm2
-.Loop_enc1_8:
-.byte 102,15,56,220,209
- decl %eax
- movups (%r8),%xmm1
- leaq 16(%r8),%r8
- jnz .Loop_enc1_8
-.byte 102,15,56,221,209
- movups (%rcx),%xmm0
- movq %rcx,%rbp
- movl %r10d,%eax
- shll $4,%r10d
- movq %rdx,%r9
- andq $-16,%rdx
-
- movups 16(%rcx,%r10,1),%xmm1
-
- movdqa .Lxts_magic(%rip),%xmm8
- movdqa %xmm2,%xmm15
- pshufd $0x5f,%xmm2,%xmm9
- pxor %xmm0,%xmm1
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm10
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm10
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm11
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm11
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm12
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm12
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm13
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm13
- pxor %xmm14,%xmm15
- movdqa %xmm15,%xmm14
- psrad $31,%xmm9
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm9
- pxor %xmm0,%xmm14
- pxor %xmm9,%xmm15
- movaps %xmm1,96(%rsp)
-
- subq $96,%rdx
- jc .Lxts_enc_short
-
- movl $16+96,%eax
- leaq 32(%rbp,%r10,1),%rcx
- subq %r10,%rax
- movups 16(%rbp),%xmm1
- movq %rax,%r10
- leaq .Lxts_magic(%rip),%r8
- jmp .Lxts_enc_grandloop
-
-.align 32
-.Lxts_enc_grandloop:
- movdqu 0(%rdi),%xmm2
- movdqa %xmm0,%xmm8
- movdqu 16(%rdi),%xmm3
- pxor %xmm10,%xmm2
- movdqu 32(%rdi),%xmm4
- pxor %xmm11,%xmm3
-.byte 102,15,56,220,209
- movdqu 48(%rdi),%xmm5
- pxor %xmm12,%xmm4
-.byte 102,15,56,220,217
- movdqu 64(%rdi),%xmm6
- pxor %xmm13,%xmm5
-.byte 102,15,56,220,225
- movdqu 80(%rdi),%xmm7
- pxor %xmm15,%xmm8
- movdqa 96(%rsp),%xmm9
- pxor %xmm14,%xmm6
-.byte 102,15,56,220,233
- movups 32(%rbp),%xmm0
- leaq 96(%rdi),%rdi
- pxor %xmm8,%xmm7
-
- pxor %xmm9,%xmm10
-.byte 102,15,56,220,241
- pxor %xmm9,%xmm11
- movdqa %xmm10,0(%rsp)
-.byte 102,15,56,220,249
- movups 48(%rbp),%xmm1
- pxor %xmm9,%xmm12
-
-.byte 102,15,56,220,208
- pxor %xmm9,%xmm13
- movdqa %xmm11,16(%rsp)
-.byte 102,15,56,220,216
- pxor %xmm9,%xmm14
- movdqa %xmm12,32(%rsp)
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
- pxor %xmm9,%xmm8
- movdqa %xmm14,64(%rsp)
-.byte 102,15,56,220,240
-.byte 102,15,56,220,248
- movups 64(%rbp),%xmm0
- movdqa %xmm8,80(%rsp)
- pshufd $0x5f,%xmm15,%xmm9
- jmp .Lxts_enc_loop6
-.align 32
-.Lxts_enc_loop6:
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
-.byte 102,15,56,220,241
-.byte 102,15,56,220,249
- movups -64(%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
-.byte 102,15,56,220,240
-.byte 102,15,56,220,248
- movups -80(%rcx,%rax,1),%xmm0
- jnz .Lxts_enc_loop6
-
- movdqa (%r8),%xmm8
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
-.byte 102,15,56,220,209
- paddq %xmm15,%xmm15
- psrad $31,%xmm14
-.byte 102,15,56,220,217
- pand %xmm8,%xmm14
- movups (%rbp),%xmm10
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
-.byte 102,15,56,220,241
- pxor %xmm14,%xmm15
- movaps %xmm10,%xmm11
-.byte 102,15,56,220,249
- movups -64(%rcx),%xmm1
-
- movdqa %xmm9,%xmm14
-.byte 102,15,56,220,208
- paddd %xmm9,%xmm9
- pxor %xmm15,%xmm10
-.byte 102,15,56,220,216
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
- pand %xmm8,%xmm14
- movaps %xmm11,%xmm12
-.byte 102,15,56,220,240
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
-.byte 102,15,56,220,248
- movups -48(%rcx),%xmm0
-
- paddd %xmm9,%xmm9
-.byte 102,15,56,220,209
- pxor %xmm15,%xmm11
- psrad $31,%xmm14
-.byte 102,15,56,220,217
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
- movdqa %xmm13,48(%rsp)
- pxor %xmm14,%xmm15
-.byte 102,15,56,220,241
- movaps %xmm12,%xmm13
- movdqa %xmm9,%xmm14
-.byte 102,15,56,220,249
- movups -32(%rcx),%xmm1
-
- paddd %xmm9,%xmm9
-.byte 102,15,56,220,208
- pxor %xmm15,%xmm12
- psrad $31,%xmm14
-.byte 102,15,56,220,216
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
-.byte 102,15,56,220,240
- pxor %xmm14,%xmm15
- movaps %xmm13,%xmm14
-.byte 102,15,56,220,248
-
- movdqa %xmm9,%xmm0
- paddd %xmm9,%xmm9
-.byte 102,15,56,220,209
- pxor %xmm15,%xmm13
- psrad $31,%xmm0
-.byte 102,15,56,220,217
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm0
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
- pxor %xmm0,%xmm15
- movups (%rbp),%xmm0
-.byte 102,15,56,220,241
-.byte 102,15,56,220,249
- movups 16(%rbp),%xmm1
-
- pxor %xmm15,%xmm14
-.byte 102,15,56,221,84,36,0
- psrad $31,%xmm9
- paddq %xmm15,%xmm15
-.byte 102,15,56,221,92,36,16
-.byte 102,15,56,221,100,36,32
- pand %xmm8,%xmm9
- movq %r10,%rax
-.byte 102,15,56,221,108,36,48
-.byte 102,15,56,221,116,36,64
-.byte 102,15,56,221,124,36,80
- pxor %xmm9,%xmm15
-
- leaq 96(%rsi),%rsi
- movups %xmm2,-96(%rsi)
- movups %xmm3,-80(%rsi)
- movups %xmm4,-64(%rsi)
- movups %xmm5,-48(%rsi)
- movups %xmm6,-32(%rsi)
- movups %xmm7,-16(%rsi)
- subq $96,%rdx
- jnc .Lxts_enc_grandloop
-
- movl $16+96,%eax
- subl %r10d,%eax
- movq %rbp,%rcx
- shrl $4,%eax
-
-.Lxts_enc_short:
-
- movl %eax,%r10d
- pxor %xmm0,%xmm10
- addq $96,%rdx
- jz .Lxts_enc_done
-
- pxor %xmm0,%xmm11
- cmpq $0x20,%rdx
- jb .Lxts_enc_one
- pxor %xmm0,%xmm12
- je .Lxts_enc_two
-
- pxor %xmm0,%xmm13
- cmpq $0x40,%rdx
- jb .Lxts_enc_three
- pxor %xmm0,%xmm14
- je .Lxts_enc_four
-
- movdqu (%rdi),%xmm2
- movdqu 16(%rdi),%xmm3
- movdqu 32(%rdi),%xmm4
- pxor %xmm10,%xmm2
- movdqu 48(%rdi),%xmm5
- pxor %xmm11,%xmm3
- movdqu 64(%rdi),%xmm6
- leaq 80(%rdi),%rdi
- pxor %xmm12,%xmm4
- pxor %xmm13,%xmm5
- pxor %xmm14,%xmm6
- pxor %xmm7,%xmm7
-
- call _aesni_encrypt6
-
- xorps %xmm10,%xmm2
- movdqa %xmm15,%xmm10
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
- movdqu %xmm2,(%rsi)
- xorps %xmm13,%xmm5
- movdqu %xmm3,16(%rsi)
- xorps %xmm14,%xmm6
- movdqu %xmm4,32(%rsi)
- movdqu %xmm5,48(%rsi)
- movdqu %xmm6,64(%rsi)
- leaq 80(%rsi),%rsi
- jmp .Lxts_enc_done
-
-.align 16
-.Lxts_enc_one:
- movups (%rdi),%xmm2
- leaq 16(%rdi),%rdi
- xorps %xmm10,%xmm2
- movups (%rcx),%xmm0
- movups 16(%rcx),%xmm1
- leaq 32(%rcx),%rcx
- xorps %xmm0,%xmm2
-.Loop_enc1_9:
-.byte 102,15,56,220,209
- decl %eax
- movups (%rcx),%xmm1
- leaq 16(%rcx),%rcx
- jnz .Loop_enc1_9
-.byte 102,15,56,221,209
- xorps %xmm10,%xmm2
- movdqa %xmm11,%xmm10
- movups %xmm2,(%rsi)
- leaq 16(%rsi),%rsi
- jmp .Lxts_enc_done
-
-.align 16
-.Lxts_enc_two:
- movups (%rdi),%xmm2
- movups 16(%rdi),%xmm3
- leaq 32(%rdi),%rdi
- xorps %xmm10,%xmm2
- xorps %xmm11,%xmm3
-
- call _aesni_encrypt2
-
- xorps %xmm10,%xmm2
- movdqa %xmm12,%xmm10
- xorps %xmm11,%xmm3
- movups %xmm2,(%rsi)
- movups %xmm3,16(%rsi)
- leaq 32(%rsi),%rsi
- jmp .Lxts_enc_done
-
-.align 16
-.Lxts_enc_three:
- movups (%rdi),%xmm2
- movups 16(%rdi),%xmm3
- movups 32(%rdi),%xmm4
- leaq 48(%rdi),%rdi
- xorps %xmm10,%xmm2
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
-
- call _aesni_encrypt3
-
- xorps %xmm10,%xmm2
- movdqa %xmm13,%xmm10
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
- movups %xmm2,(%rsi)
- movups %xmm3,16(%rsi)
- movups %xmm4,32(%rsi)
- leaq 48(%rsi),%rsi
- jmp .Lxts_enc_done
-
-.align 16
-.Lxts_enc_four:
- movups (%rdi),%xmm2
- movups 16(%rdi),%xmm3
- movups 32(%rdi),%xmm4
- xorps %xmm10,%xmm2
- movups 48(%rdi),%xmm5
- leaq 64(%rdi),%rdi
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
- xorps %xmm13,%xmm5
-
- call _aesni_encrypt4
-
- pxor %xmm10,%xmm2
- movdqa %xmm14,%xmm10
- pxor %xmm11,%xmm3
- pxor %xmm12,%xmm4
- movdqu %xmm2,(%rsi)
- pxor %xmm13,%xmm5
- movdqu %xmm3,16(%rsi)
- movdqu %xmm4,32(%rsi)
- movdqu %xmm5,48(%rsi)
- leaq 64(%rsi),%rsi
- jmp .Lxts_enc_done
-
-.align 16
-.Lxts_enc_done:
- andq $15,%r9
- jz .Lxts_enc_ret
- movq %r9,%rdx
-
-.Lxts_enc_steal:
- movzbl (%rdi),%eax
- movzbl -16(%rsi),%ecx
- leaq 1(%rdi),%rdi
- movb %al,-16(%rsi)
- movb %cl,0(%rsi)
- leaq 1(%rsi),%rsi
- subq $1,%rdx
- jnz .Lxts_enc_steal
-
- subq %r9,%rsi
- movq %rbp,%rcx
- movl %r10d,%eax
-
- movups -16(%rsi),%xmm2
- xorps %xmm10,%xmm2
- movups (%rcx),%xmm0
- movups 16(%rcx),%xmm1
- leaq 32(%rcx),%rcx
- xorps %xmm0,%xmm2
-.Loop_enc1_10:
-.byte 102,15,56,220,209
- decl %eax
- movups (%rcx),%xmm1
- leaq 16(%rcx),%rcx
- jnz .Loop_enc1_10
-.byte 102,15,56,221,209
- xorps %xmm10,%xmm2
- movups %xmm2,-16(%rsi)
-
-.Lxts_enc_ret:
- xorps %xmm0,%xmm0
- pxor %xmm1,%xmm1
- pxor %xmm2,%xmm2
- pxor %xmm3,%xmm3
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- pxor %xmm6,%xmm6
- pxor %xmm7,%xmm7
- movaps %xmm0,0(%rsp)
- pxor %xmm8,%xmm8
- movaps %xmm0,16(%rsp)
- pxor %xmm9,%xmm9
- movaps %xmm0,32(%rsp)
- pxor %xmm10,%xmm10
- movaps %xmm0,48(%rsp)
- pxor %xmm11,%xmm11
- movaps %xmm0,64(%rsp)
- pxor %xmm12,%xmm12
- movaps %xmm0,80(%rsp)
- pxor %xmm13,%xmm13
- movaps %xmm0,96(%rsp)
- pxor %xmm14,%xmm14
- pxor %xmm15,%xmm15
- movq -8(%r11),%rbp
-.cfi_restore %rbp
- leaq (%r11),%rsp
-.cfi_def_cfa_register %rsp
-.Lxts_enc_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aes_hw_xts_encrypt,.-aes_hw_xts_encrypt
-.globl aes_hw_xts_decrypt
-.hidden aes_hw_xts_decrypt
-.type aes_hw_xts_decrypt,@function
-.align 16
-aes_hw_xts_decrypt:
-.cfi_startproc
- leaq (%rsp),%r11
-.cfi_def_cfa_register %r11
- pushq %rbp
-.cfi_offset %rbp,-16
- subq $112,%rsp
- andq $-16,%rsp
- movups (%r9),%xmm2
- movl 240(%r8),%eax
- movl 240(%rcx),%r10d
- movups (%r8),%xmm0
- movups 16(%r8),%xmm1
- leaq 32(%r8),%r8
- xorps %xmm0,%xmm2
-.Loop_enc1_11:
-.byte 102,15,56,220,209
- decl %eax
- movups (%r8),%xmm1
- leaq 16(%r8),%r8
- jnz .Loop_enc1_11
-.byte 102,15,56,221,209
- xorl %eax,%eax
- testq $15,%rdx
- setnz %al
- shlq $4,%rax
- subq %rax,%rdx
-
- movups (%rcx),%xmm0
- movq %rcx,%rbp
- movl %r10d,%eax
- shll $4,%r10d
- movq %rdx,%r9
- andq $-16,%rdx
-
- movups 16(%rcx,%r10,1),%xmm1
-
- movdqa .Lxts_magic(%rip),%xmm8
- movdqa %xmm2,%xmm15
- pshufd $0x5f,%xmm2,%xmm9
- pxor %xmm0,%xmm1
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm10
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm10
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm11
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm11
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm12
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm12
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
- movdqa %xmm15,%xmm13
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
- pxor %xmm0,%xmm13
- pxor %xmm14,%xmm15
- movdqa %xmm15,%xmm14
- psrad $31,%xmm9
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm9
- pxor %xmm0,%xmm14
- pxor %xmm9,%xmm15
- movaps %xmm1,96(%rsp)
-
- subq $96,%rdx
- jc .Lxts_dec_short
-
- movl $16+96,%eax
- leaq 32(%rbp,%r10,1),%rcx
- subq %r10,%rax
- movups 16(%rbp),%xmm1
- movq %rax,%r10
- leaq .Lxts_magic(%rip),%r8
- jmp .Lxts_dec_grandloop
-
-.align 32
-.Lxts_dec_grandloop:
- movdqu 0(%rdi),%xmm2
- movdqa %xmm0,%xmm8
- movdqu 16(%rdi),%xmm3
- pxor %xmm10,%xmm2
- movdqu 32(%rdi),%xmm4
- pxor %xmm11,%xmm3
-.byte 102,15,56,222,209
- movdqu 48(%rdi),%xmm5
- pxor %xmm12,%xmm4
-.byte 102,15,56,222,217
- movdqu 64(%rdi),%xmm6
- pxor %xmm13,%xmm5
-.byte 102,15,56,222,225
- movdqu 80(%rdi),%xmm7
- pxor %xmm15,%xmm8
- movdqa 96(%rsp),%xmm9
- pxor %xmm14,%xmm6
-.byte 102,15,56,222,233
- movups 32(%rbp),%xmm0
- leaq 96(%rdi),%rdi
- pxor %xmm8,%xmm7
-
- pxor %xmm9,%xmm10
-.byte 102,15,56,222,241
- pxor %xmm9,%xmm11
- movdqa %xmm10,0(%rsp)
-.byte 102,15,56,222,249
- movups 48(%rbp),%xmm1
- pxor %xmm9,%xmm12
-
-.byte 102,15,56,222,208
- pxor %xmm9,%xmm13
- movdqa %xmm11,16(%rsp)
-.byte 102,15,56,222,216
- pxor %xmm9,%xmm14
- movdqa %xmm12,32(%rsp)
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
- pxor %xmm9,%xmm8
- movdqa %xmm14,64(%rsp)
-.byte 102,15,56,222,240
-.byte 102,15,56,222,248
- movups 64(%rbp),%xmm0
- movdqa %xmm8,80(%rsp)
- pshufd $0x5f,%xmm15,%xmm9
- jmp .Lxts_dec_loop6
-.align 32
-.Lxts_dec_loop6:
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
-.byte 102,15,56,222,241
-.byte 102,15,56,222,249
- movups -64(%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,222,208
-.byte 102,15,56,222,216
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
-.byte 102,15,56,222,240
-.byte 102,15,56,222,248
- movups -80(%rcx,%rax,1),%xmm0
- jnz .Lxts_dec_loop6
-
- movdqa (%r8),%xmm8
- movdqa %xmm9,%xmm14
- paddd %xmm9,%xmm9
-.byte 102,15,56,222,209
- paddq %xmm15,%xmm15
- psrad $31,%xmm14
-.byte 102,15,56,222,217
- pand %xmm8,%xmm14
- movups (%rbp),%xmm10
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
-.byte 102,15,56,222,241
- pxor %xmm14,%xmm15
- movaps %xmm10,%xmm11
-.byte 102,15,56,222,249
- movups -64(%rcx),%xmm1
-
- movdqa %xmm9,%xmm14
-.byte 102,15,56,222,208
- paddd %xmm9,%xmm9
- pxor %xmm15,%xmm10
-.byte 102,15,56,222,216
- psrad $31,%xmm14
- paddq %xmm15,%xmm15
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
- pand %xmm8,%xmm14
- movaps %xmm11,%xmm12
-.byte 102,15,56,222,240
- pxor %xmm14,%xmm15
- movdqa %xmm9,%xmm14
-.byte 102,15,56,222,248
- movups -48(%rcx),%xmm0
-
- paddd %xmm9,%xmm9
-.byte 102,15,56,222,209
- pxor %xmm15,%xmm11
- psrad $31,%xmm14
-.byte 102,15,56,222,217
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
- movdqa %xmm13,48(%rsp)
- pxor %xmm14,%xmm15
-.byte 102,15,56,222,241
- movaps %xmm12,%xmm13
- movdqa %xmm9,%xmm14
-.byte 102,15,56,222,249
- movups -32(%rcx),%xmm1
-
- paddd %xmm9,%xmm9
-.byte 102,15,56,222,208
- pxor %xmm15,%xmm12
- psrad $31,%xmm14
-.byte 102,15,56,222,216
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm14
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
-.byte 102,15,56,222,240
- pxor %xmm14,%xmm15
- movaps %xmm13,%xmm14
-.byte 102,15,56,222,248
-
- movdqa %xmm9,%xmm0
- paddd %xmm9,%xmm9
-.byte 102,15,56,222,209
- pxor %xmm15,%xmm13
- psrad $31,%xmm0
-.byte 102,15,56,222,217
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm0
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
- pxor %xmm0,%xmm15
- movups (%rbp),%xmm0
-.byte 102,15,56,222,241
-.byte 102,15,56,222,249
- movups 16(%rbp),%xmm1
-
- pxor %xmm15,%xmm14
-.byte 102,15,56,223,84,36,0
- psrad $31,%xmm9
- paddq %xmm15,%xmm15
-.byte 102,15,56,223,92,36,16
-.byte 102,15,56,223,100,36,32
- pand %xmm8,%xmm9
- movq %r10,%rax
-.byte 102,15,56,223,108,36,48
-.byte 102,15,56,223,116,36,64
-.byte 102,15,56,223,124,36,80
- pxor %xmm9,%xmm15
-
- leaq 96(%rsi),%rsi
- movups %xmm2,-96(%rsi)
- movups %xmm3,-80(%rsi)
- movups %xmm4,-64(%rsi)
- movups %xmm5,-48(%rsi)
- movups %xmm6,-32(%rsi)
- movups %xmm7,-16(%rsi)
- subq $96,%rdx
- jnc .Lxts_dec_grandloop
-
- movl $16+96,%eax
- subl %r10d,%eax
- movq %rbp,%rcx
- shrl $4,%eax
-
-.Lxts_dec_short:
-
- movl %eax,%r10d
- pxor %xmm0,%xmm10
- pxor %xmm0,%xmm11
- addq $96,%rdx
- jz .Lxts_dec_done
-
- pxor %xmm0,%xmm12
- cmpq $0x20,%rdx
- jb .Lxts_dec_one
- pxor %xmm0,%xmm13
- je .Lxts_dec_two
-
- pxor %xmm0,%xmm14
- cmpq $0x40,%rdx
- jb .Lxts_dec_three
- je .Lxts_dec_four
-
- movdqu (%rdi),%xmm2
- movdqu 16(%rdi),%xmm3
- movdqu 32(%rdi),%xmm4
- pxor %xmm10,%xmm2
- movdqu 48(%rdi),%xmm5
- pxor %xmm11,%xmm3
- movdqu 64(%rdi),%xmm6
- leaq 80(%rdi),%rdi
- pxor %xmm12,%xmm4
- pxor %xmm13,%xmm5
- pxor %xmm14,%xmm6
-
- call _aesni_decrypt6
-
- xorps %xmm10,%xmm2
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
- movdqu %xmm2,(%rsi)
- xorps %xmm13,%xmm5
- movdqu %xmm3,16(%rsi)
- xorps %xmm14,%xmm6
- movdqu %xmm4,32(%rsi)
- pxor %xmm14,%xmm14
- movdqu %xmm5,48(%rsi)
- pcmpgtd %xmm15,%xmm14
- movdqu %xmm6,64(%rsi)
- leaq 80(%rsi),%rsi
- pshufd $0x13,%xmm14,%xmm11
- andq $15,%r9
- jz .Lxts_dec_ret
-
- movdqa %xmm15,%xmm10
- paddq %xmm15,%xmm15
- pand %xmm8,%xmm11
- pxor %xmm15,%xmm11
- jmp .Lxts_dec_done2
-
-.align 16
-.Lxts_dec_one:
- movups (%rdi),%xmm2
- leaq 16(%rdi),%rdi
- xorps %xmm10,%xmm2
- movups (%rcx),%xmm0
- movups 16(%rcx),%xmm1
- leaq 32(%rcx),%rcx
- xorps %xmm0,%xmm2
-.Loop_dec1_12:
-.byte 102,15,56,222,209
- decl %eax
- movups (%rcx),%xmm1
- leaq 16(%rcx),%rcx
- jnz .Loop_dec1_12
-.byte 102,15,56,223,209
- xorps %xmm10,%xmm2
- movdqa %xmm11,%xmm10
- movups %xmm2,(%rsi)
- movdqa %xmm12,%xmm11
- leaq 16(%rsi),%rsi
- jmp .Lxts_dec_done
-
-.align 16
-.Lxts_dec_two:
- movups (%rdi),%xmm2
- movups 16(%rdi),%xmm3
- leaq 32(%rdi),%rdi
- xorps %xmm10,%xmm2
- xorps %xmm11,%xmm3
-
- call _aesni_decrypt2
-
- xorps %xmm10,%xmm2
- movdqa %xmm12,%xmm10
- xorps %xmm11,%xmm3
- movdqa %xmm13,%xmm11
- movups %xmm2,(%rsi)
- movups %xmm3,16(%rsi)
- leaq 32(%rsi),%rsi
- jmp .Lxts_dec_done
-
-.align 16
-.Lxts_dec_three:
- movups (%rdi),%xmm2
- movups 16(%rdi),%xmm3
- movups 32(%rdi),%xmm4
- leaq 48(%rdi),%rdi
- xorps %xmm10,%xmm2
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
-
- call _aesni_decrypt3
-
- xorps %xmm10,%xmm2
- movdqa %xmm13,%xmm10
- xorps %xmm11,%xmm3
- movdqa %xmm14,%xmm11
- xorps %xmm12,%xmm4
- movups %xmm2,(%rsi)
- movups %xmm3,16(%rsi)
- movups %xmm4,32(%rsi)
- leaq 48(%rsi),%rsi
- jmp .Lxts_dec_done
-
-.align 16
-.Lxts_dec_four:
- movups (%rdi),%xmm2
- movups 16(%rdi),%xmm3
- movups 32(%rdi),%xmm4
- xorps %xmm10,%xmm2
- movups 48(%rdi),%xmm5
- leaq 64(%rdi),%rdi
- xorps %xmm11,%xmm3
- xorps %xmm12,%xmm4
- xorps %xmm13,%xmm5
-
- call _aesni_decrypt4
-
- pxor %xmm10,%xmm2
- movdqa %xmm14,%xmm10
- pxor %xmm11,%xmm3
- movdqa %xmm15,%xmm11
- pxor %xmm12,%xmm4
- movdqu %xmm2,(%rsi)
- pxor %xmm13,%xmm5
- movdqu %xmm3,16(%rsi)
- movdqu %xmm4,32(%rsi)
- movdqu %xmm5,48(%rsi)
- leaq 64(%rsi),%rsi
- jmp .Lxts_dec_done
-
-.align 16
-.Lxts_dec_done:
- andq $15,%r9
- jz .Lxts_dec_ret
-.Lxts_dec_done2:
- movq %r9,%rdx
- movq %rbp,%rcx
- movl %r10d,%eax
-
- movups (%rdi),%xmm2
- xorps %xmm11,%xmm2
- movups (%rcx),%xmm0
- movups 16(%rcx),%xmm1
- leaq 32(%rcx),%rcx
- xorps %xmm0,%xmm2
-.Loop_dec1_13:
-.byte 102,15,56,222,209
- decl %eax
- movups (%rcx),%xmm1
- leaq 16(%rcx),%rcx
- jnz .Loop_dec1_13
-.byte 102,15,56,223,209
- xorps %xmm11,%xmm2
- movups %xmm2,(%rsi)
-
-.Lxts_dec_steal:
- movzbl 16(%rdi),%eax
- movzbl (%rsi),%ecx
- leaq 1(%rdi),%rdi
- movb %al,(%rsi)
- movb %cl,16(%rsi)
- leaq 1(%rsi),%rsi
- subq $1,%rdx
- jnz .Lxts_dec_steal
-
- subq %r9,%rsi
- movq %rbp,%rcx
- movl %r10d,%eax
-
- movups (%rsi),%xmm2
- xorps %xmm10,%xmm2
- movups (%rcx),%xmm0
- movups 16(%rcx),%xmm1
- leaq 32(%rcx),%rcx
- xorps %xmm0,%xmm2
-.Loop_dec1_14:
-.byte 102,15,56,222,209
- decl %eax
- movups (%rcx),%xmm1
- leaq 16(%rcx),%rcx
- jnz .Loop_dec1_14
-.byte 102,15,56,223,209
- xorps %xmm10,%xmm2
- movups %xmm2,(%rsi)
-
-.Lxts_dec_ret:
- xorps %xmm0,%xmm0
- pxor %xmm1,%xmm1
- pxor %xmm2,%xmm2
- pxor %xmm3,%xmm3
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- pxor %xmm6,%xmm6
- pxor %xmm7,%xmm7
- movaps %xmm0,0(%rsp)
- pxor %xmm8,%xmm8
- movaps %xmm0,16(%rsp)
- pxor %xmm9,%xmm9
- movaps %xmm0,32(%rsp)
- pxor %xmm10,%xmm10
- movaps %xmm0,48(%rsp)
- pxor %xmm11,%xmm11
- movaps %xmm0,64(%rsp)
- pxor %xmm12,%xmm12
- movaps %xmm0,80(%rsp)
- pxor %xmm13,%xmm13
- movaps %xmm0,96(%rsp)
- pxor %xmm14,%xmm14
- pxor %xmm15,%xmm15
- movq -8(%r11),%rbp
-.cfi_restore %rbp
- leaq (%r11),%rsp
-.cfi_def_cfa_register %rsp
-.Lxts_dec_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aes_hw_xts_decrypt,.-aes_hw_xts_decrypt
-.globl aes_hw_ocb_encrypt
-.hidden aes_hw_ocb_encrypt
-.type aes_hw_ocb_encrypt,@function
-.align 32
-aes_hw_ocb_encrypt:
-.cfi_startproc
- leaq (%rsp),%rax
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- movq 8(%rax),%rbx
- movq 8+8(%rax),%rbp
-
- movl 240(%rcx),%r10d
- movq %rcx,%r11
- shll $4,%r10d
- movups (%rcx),%xmm9
- movups 16(%rcx,%r10,1),%xmm1
-
- movdqu (%r9),%xmm15
- pxor %xmm1,%xmm9
- pxor %xmm1,%xmm15
-
- movl $16+32,%eax
- leaq 32(%r11,%r10,1),%rcx
- movups 16(%r11),%xmm1
- subq %r10,%rax
- movq %rax,%r10
-
- movdqu (%rbx),%xmm10
- movdqu (%rbp),%xmm8
-
- testq $1,%r8
- jnz .Locb_enc_odd
-
- bsfq %r8,%r12
- addq $1,%r8
- shlq $4,%r12
- movdqu (%rbx,%r12,1),%xmm7
- movdqu (%rdi),%xmm2
- leaq 16(%rdi),%rdi
-
- call __ocb_encrypt1
-
- movdqa %xmm7,%xmm15
- movups %xmm2,(%rsi)
- leaq 16(%rsi),%rsi
- subq $1,%rdx
- jz .Locb_enc_done
-
-.Locb_enc_odd:
- leaq 1(%r8),%r12
- leaq 3(%r8),%r13
- leaq 5(%r8),%r14
- leaq 6(%r8),%r8
- bsfq %r12,%r12
- bsfq %r13,%r13
- bsfq %r14,%r14
- shlq $4,%r12
- shlq $4,%r13
- shlq $4,%r14
-
- subq $6,%rdx
- jc .Locb_enc_short
- jmp .Locb_enc_grandloop
-
-.align 32
-.Locb_enc_grandloop:
- movdqu 0(%rdi),%xmm2
- movdqu 16(%rdi),%xmm3
- movdqu 32(%rdi),%xmm4
- movdqu 48(%rdi),%xmm5
- movdqu 64(%rdi),%xmm6
- movdqu 80(%rdi),%xmm7
- leaq 96(%rdi),%rdi
-
- call __ocb_encrypt6
-
- movups %xmm2,0(%rsi)
- movups %xmm3,16(%rsi)
- movups %xmm4,32(%rsi)
- movups %xmm5,48(%rsi)
- movups %xmm6,64(%rsi)
- movups %xmm7,80(%rsi)
- leaq 96(%rsi),%rsi
- subq $6,%rdx
- jnc .Locb_enc_grandloop
-
-.Locb_enc_short:
- addq $6,%rdx
- jz .Locb_enc_done
-
- movdqu 0(%rdi),%xmm2
- cmpq $2,%rdx
- jb .Locb_enc_one
- movdqu 16(%rdi),%xmm3
- je .Locb_enc_two
-
- movdqu 32(%rdi),%xmm4
- cmpq $4,%rdx
- jb .Locb_enc_three
- movdqu 48(%rdi),%xmm5
- je .Locb_enc_four
-
- movdqu 64(%rdi),%xmm6
- pxor %xmm7,%xmm7
-
- call __ocb_encrypt6
-
- movdqa %xmm14,%xmm15
- movups %xmm2,0(%rsi)
- movups %xmm3,16(%rsi)
- movups %xmm4,32(%rsi)
- movups %xmm5,48(%rsi)
- movups %xmm6,64(%rsi)
-
- jmp .Locb_enc_done
-
-.align 16
-.Locb_enc_one:
- movdqa %xmm10,%xmm7
-
- call __ocb_encrypt1
-
- movdqa %xmm7,%xmm15
- movups %xmm2,0(%rsi)
- jmp .Locb_enc_done
-
-.align 16
-.Locb_enc_two:
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
-
- call __ocb_encrypt4
-
- movdqa %xmm11,%xmm15
- movups %xmm2,0(%rsi)
- movups %xmm3,16(%rsi)
-
- jmp .Locb_enc_done
-
-.align 16
-.Locb_enc_three:
- pxor %xmm5,%xmm5
-
- call __ocb_encrypt4
-
- movdqa %xmm12,%xmm15
- movups %xmm2,0(%rsi)
- movups %xmm3,16(%rsi)
- movups %xmm4,32(%rsi)
-
- jmp .Locb_enc_done
-
-.align 16
-.Locb_enc_four:
- call __ocb_encrypt4
-
- movdqa %xmm13,%xmm15
- movups %xmm2,0(%rsi)
- movups %xmm3,16(%rsi)
- movups %xmm4,32(%rsi)
- movups %xmm5,48(%rsi)
-
-.Locb_enc_done:
- pxor %xmm0,%xmm15
- movdqu %xmm8,(%rbp)
- movdqu %xmm15,(%r9)
-
- xorps %xmm0,%xmm0
- pxor %xmm1,%xmm1
- pxor %xmm2,%xmm2
- pxor %xmm3,%xmm3
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- pxor %xmm6,%xmm6
- pxor %xmm7,%xmm7
- pxor %xmm8,%xmm8
- pxor %xmm9,%xmm9
- pxor %xmm10,%xmm10
- pxor %xmm11,%xmm11
- pxor %xmm12,%xmm12
- pxor %xmm13,%xmm13
- pxor %xmm14,%xmm14
- pxor %xmm15,%xmm15
- leaq 40(%rsp),%rax
-.cfi_def_cfa %rax,8
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Locb_enc_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aes_hw_ocb_encrypt,.-aes_hw_ocb_encrypt
-
-.type __ocb_encrypt6,@function
-.align 32
-__ocb_encrypt6:
- pxor %xmm9,%xmm15
- movdqu (%rbx,%r12,1),%xmm11
- movdqa %xmm10,%xmm12
- movdqu (%rbx,%r13,1),%xmm13
- movdqa %xmm10,%xmm14
- pxor %xmm15,%xmm10
- movdqu (%rbx,%r14,1),%xmm15
- pxor %xmm10,%xmm11
- pxor %xmm2,%xmm8
- pxor %xmm10,%xmm2
- pxor %xmm11,%xmm12
- pxor %xmm3,%xmm8
- pxor %xmm11,%xmm3
- pxor %xmm12,%xmm13
- pxor %xmm4,%xmm8
- pxor %xmm12,%xmm4
- pxor %xmm13,%xmm14
- pxor %xmm5,%xmm8
- pxor %xmm13,%xmm5
- pxor %xmm14,%xmm15
- pxor %xmm6,%xmm8
- pxor %xmm14,%xmm6
- pxor %xmm7,%xmm8
- pxor %xmm15,%xmm7
- movups 32(%r11),%xmm0
-
- leaq 1(%r8),%r12
- leaq 3(%r8),%r13
- leaq 5(%r8),%r14
- addq $6,%r8
- pxor %xmm9,%xmm10
- bsfq %r12,%r12
- bsfq %r13,%r13
- bsfq %r14,%r14
-
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
- pxor %xmm9,%xmm11
- pxor %xmm9,%xmm12
-.byte 102,15,56,220,241
- pxor %xmm9,%xmm13
- pxor %xmm9,%xmm14
-.byte 102,15,56,220,249
- movups 48(%r11),%xmm1
- pxor %xmm9,%xmm15
-
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
-.byte 102,15,56,220,240
-.byte 102,15,56,220,248
- movups 64(%r11),%xmm0
- shlq $4,%r12
- shlq $4,%r13
- jmp .Locb_enc_loop6
-
-.align 32
-.Locb_enc_loop6:
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
-.byte 102,15,56,220,241
-.byte 102,15,56,220,249
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
-.byte 102,15,56,220,240
-.byte 102,15,56,220,248
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Locb_enc_loop6
-
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
-.byte 102,15,56,220,241
-.byte 102,15,56,220,249
- movups 16(%r11),%xmm1
- shlq $4,%r14
-
-.byte 102,65,15,56,221,210
- movdqu (%rbx),%xmm10
- movq %r10,%rax
-.byte 102,65,15,56,221,219
-.byte 102,65,15,56,221,228
-.byte 102,65,15,56,221,237
-.byte 102,65,15,56,221,246
-.byte 102,65,15,56,221,255
- .byte 0xf3,0xc3
-.size __ocb_encrypt6,.-__ocb_encrypt6
-
-.type __ocb_encrypt4,@function
-.align 32
-__ocb_encrypt4:
- pxor %xmm9,%xmm15
- movdqu (%rbx,%r12,1),%xmm11
- movdqa %xmm10,%xmm12
- movdqu (%rbx,%r13,1),%xmm13
- pxor %xmm15,%xmm10
- pxor %xmm10,%xmm11
- pxor %xmm2,%xmm8
- pxor %xmm10,%xmm2
- pxor %xmm11,%xmm12
- pxor %xmm3,%xmm8
- pxor %xmm11,%xmm3
- pxor %xmm12,%xmm13
- pxor %xmm4,%xmm8
- pxor %xmm12,%xmm4
- pxor %xmm5,%xmm8
- pxor %xmm13,%xmm5
- movups 32(%r11),%xmm0
-
- pxor %xmm9,%xmm10
- pxor %xmm9,%xmm11
- pxor %xmm9,%xmm12
- pxor %xmm9,%xmm13
-
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
- movups 48(%r11),%xmm1
-
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
- movups 64(%r11),%xmm0
- jmp .Locb_enc_loop4
-
-.align 32
-.Locb_enc_loop4:
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,220,208
-.byte 102,15,56,220,216
-.byte 102,15,56,220,224
-.byte 102,15,56,220,232
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Locb_enc_loop4
-
-.byte 102,15,56,220,209
-.byte 102,15,56,220,217
-.byte 102,15,56,220,225
-.byte 102,15,56,220,233
- movups 16(%r11),%xmm1
- movq %r10,%rax
-
-.byte 102,65,15,56,221,210
-.byte 102,65,15,56,221,219
-.byte 102,65,15,56,221,228
-.byte 102,65,15,56,221,237
- .byte 0xf3,0xc3
-.size __ocb_encrypt4,.-__ocb_encrypt4
-
-.type __ocb_encrypt1,@function
-.align 32
-__ocb_encrypt1:
- pxor %xmm15,%xmm7
- pxor %xmm9,%xmm7
- pxor %xmm2,%xmm8
- pxor %xmm7,%xmm2
- movups 32(%r11),%xmm0
-
-.byte 102,15,56,220,209
- movups 48(%r11),%xmm1
- pxor %xmm9,%xmm7
-
-.byte 102,15,56,220,208
- movups 64(%r11),%xmm0
- jmp .Locb_enc_loop1
-
-.align 32
-.Locb_enc_loop1:
-.byte 102,15,56,220,209
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,220,208
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Locb_enc_loop1
-
-.byte 102,15,56,220,209
- movups 16(%r11),%xmm1
- movq %r10,%rax
-
-.byte 102,15,56,221,215
- .byte 0xf3,0xc3
-.size __ocb_encrypt1,.-__ocb_encrypt1
-
-.globl aes_hw_ocb_decrypt
-.hidden aes_hw_ocb_decrypt
-.type aes_hw_ocb_decrypt,@function
-.align 32
-aes_hw_ocb_decrypt:
-.cfi_startproc
- leaq (%rsp),%rax
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- movq 8(%rax),%rbx
- movq 8+8(%rax),%rbp
-
- movl 240(%rcx),%r10d
- movq %rcx,%r11
- shll $4,%r10d
- movups (%rcx),%xmm9
- movups 16(%rcx,%r10,1),%xmm1
-
- movdqu (%r9),%xmm15
- pxor %xmm1,%xmm9
- pxor %xmm1,%xmm15
-
- movl $16+32,%eax
- leaq 32(%r11,%r10,1),%rcx
- movups 16(%r11),%xmm1
- subq %r10,%rax
- movq %rax,%r10
-
- movdqu (%rbx),%xmm10
- movdqu (%rbp),%xmm8
-
- testq $1,%r8
- jnz .Locb_dec_odd
-
- bsfq %r8,%r12
- addq $1,%r8
- shlq $4,%r12
- movdqu (%rbx,%r12,1),%xmm7
- movdqu (%rdi),%xmm2
- leaq 16(%rdi),%rdi
-
- call __ocb_decrypt1
-
- movdqa %xmm7,%xmm15
- movups %xmm2,(%rsi)
- xorps %xmm2,%xmm8
- leaq 16(%rsi),%rsi
- subq $1,%rdx
- jz .Locb_dec_done
-
-.Locb_dec_odd:
- leaq 1(%r8),%r12
- leaq 3(%r8),%r13
- leaq 5(%r8),%r14
- leaq 6(%r8),%r8
- bsfq %r12,%r12
- bsfq %r13,%r13
- bsfq %r14,%r14
- shlq $4,%r12
- shlq $4,%r13
- shlq $4,%r14
-
- subq $6,%rdx
- jc .Locb_dec_short
- jmp .Locb_dec_grandloop
-
-.align 32
-.Locb_dec_grandloop:
- movdqu 0(%rdi),%xmm2
- movdqu 16(%rdi),%xmm3
- movdqu 32(%rdi),%xmm4
- movdqu 48(%rdi),%xmm5
- movdqu 64(%rdi),%xmm6
- movdqu 80(%rdi),%xmm7
- leaq 96(%rdi),%rdi
-
- call __ocb_decrypt6
-
- movups %xmm2,0(%rsi)
- pxor %xmm2,%xmm8
- movups %xmm3,16(%rsi)
- pxor %xmm3,%xmm8
- movups %xmm4,32(%rsi)
- pxor %xmm4,%xmm8
- movups %xmm5,48(%rsi)
- pxor %xmm5,%xmm8
- movups %xmm6,64(%rsi)
- pxor %xmm6,%xmm8
- movups %xmm7,80(%rsi)
- pxor %xmm7,%xmm8
- leaq 96(%rsi),%rsi
- subq $6,%rdx
- jnc .Locb_dec_grandloop
-
-.Locb_dec_short:
- addq $6,%rdx
- jz .Locb_dec_done
-
- movdqu 0(%rdi),%xmm2
- cmpq $2,%rdx
- jb .Locb_dec_one
- movdqu 16(%rdi),%xmm3
- je .Locb_dec_two
-
- movdqu 32(%rdi),%xmm4
- cmpq $4,%rdx
- jb .Locb_dec_three
- movdqu 48(%rdi),%xmm5
- je .Locb_dec_four
-
- movdqu 64(%rdi),%xmm6
- pxor %xmm7,%xmm7
-
- call __ocb_decrypt6
-
- movdqa %xmm14,%xmm15
- movups %xmm2,0(%rsi)
- pxor %xmm2,%xmm8
- movups %xmm3,16(%rsi)
- pxor %xmm3,%xmm8
- movups %xmm4,32(%rsi)
- pxor %xmm4,%xmm8
- movups %xmm5,48(%rsi)
- pxor %xmm5,%xmm8
- movups %xmm6,64(%rsi)
- pxor %xmm6,%xmm8
-
- jmp .Locb_dec_done
-
-.align 16
-.Locb_dec_one:
- movdqa %xmm10,%xmm7
-
- call __ocb_decrypt1
-
- movdqa %xmm7,%xmm15
- movups %xmm2,0(%rsi)
- xorps %xmm2,%xmm8
- jmp .Locb_dec_done
-
-.align 16
-.Locb_dec_two:
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
-
- call __ocb_decrypt4
-
- movdqa %xmm11,%xmm15
- movups %xmm2,0(%rsi)
- xorps %xmm2,%xmm8
- movups %xmm3,16(%rsi)
- xorps %xmm3,%xmm8
-
- jmp .Locb_dec_done
-
-.align 16
-.Locb_dec_three:
- pxor %xmm5,%xmm5
-
- call __ocb_decrypt4
-
- movdqa %xmm12,%xmm15
- movups %xmm2,0(%rsi)
- xorps %xmm2,%xmm8
- movups %xmm3,16(%rsi)
- xorps %xmm3,%xmm8
- movups %xmm4,32(%rsi)
- xorps %xmm4,%xmm8
-
- jmp .Locb_dec_done
-
-.align 16
-.Locb_dec_four:
- call __ocb_decrypt4
-
- movdqa %xmm13,%xmm15
- movups %xmm2,0(%rsi)
- pxor %xmm2,%xmm8
- movups %xmm3,16(%rsi)
- pxor %xmm3,%xmm8
- movups %xmm4,32(%rsi)
- pxor %xmm4,%xmm8
- movups %xmm5,48(%rsi)
- pxor %xmm5,%xmm8
-
-.Locb_dec_done:
- pxor %xmm0,%xmm15
- movdqu %xmm8,(%rbp)
- movdqu %xmm15,(%r9)
-
- xorps %xmm0,%xmm0
- pxor %xmm1,%xmm1
- pxor %xmm2,%xmm2
- pxor %xmm3,%xmm3
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- pxor %xmm6,%xmm6
- pxor %xmm7,%xmm7
- pxor %xmm8,%xmm8
- pxor %xmm9,%xmm9
- pxor %xmm10,%xmm10
- pxor %xmm11,%xmm11
- pxor %xmm12,%xmm12
- pxor %xmm13,%xmm13
- pxor %xmm14,%xmm14
- pxor %xmm15,%xmm15
- leaq 40(%rsp),%rax
-.cfi_def_cfa %rax,8
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Locb_dec_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aes_hw_ocb_decrypt,.-aes_hw_ocb_decrypt
-
-.type __ocb_decrypt6,@function
-.align 32
-__ocb_decrypt6:
- pxor %xmm9,%xmm15
- movdqu (%rbx,%r12,1),%xmm11
- movdqa %xmm10,%xmm12
- movdqu (%rbx,%r13,1),%xmm13
- movdqa %xmm10,%xmm14
- pxor %xmm15,%xmm10
- movdqu (%rbx,%r14,1),%xmm15
- pxor %xmm10,%xmm11
- pxor %xmm10,%xmm2
- pxor %xmm11,%xmm12
- pxor %xmm11,%xmm3
- pxor %xmm12,%xmm13
- pxor %xmm12,%xmm4
- pxor %xmm13,%xmm14
- pxor %xmm13,%xmm5
- pxor %xmm14,%xmm15
- pxor %xmm14,%xmm6
- pxor %xmm15,%xmm7
- movups 32(%r11),%xmm0
-
- leaq 1(%r8),%r12
- leaq 3(%r8),%r13
- leaq 5(%r8),%r14
- addq $6,%r8
- pxor %xmm9,%xmm10
- bsfq %r12,%r12
- bsfq %r13,%r13
- bsfq %r14,%r14
-
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
- pxor %xmm9,%xmm11
- pxor %xmm9,%xmm12
-.byte 102,15,56,222,241
- pxor %xmm9,%xmm13
- pxor %xmm9,%xmm14
-.byte 102,15,56,222,249
- movups 48(%r11),%xmm1
- pxor %xmm9,%xmm15
-
-.byte 102,15,56,222,208
-.byte 102,15,56,222,216
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
-.byte 102,15,56,222,240
-.byte 102,15,56,222,248
- movups 64(%r11),%xmm0
- shlq $4,%r12
- shlq $4,%r13
- jmp .Locb_dec_loop6
-
-.align 32
-.Locb_dec_loop6:
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
-.byte 102,15,56,222,241
-.byte 102,15,56,222,249
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,222,208
-.byte 102,15,56,222,216
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
-.byte 102,15,56,222,240
-.byte 102,15,56,222,248
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Locb_dec_loop6
-
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
-.byte 102,15,56,222,241
-.byte 102,15,56,222,249
- movups 16(%r11),%xmm1
- shlq $4,%r14
-
-.byte 102,65,15,56,223,210
- movdqu (%rbx),%xmm10
- movq %r10,%rax
-.byte 102,65,15,56,223,219
-.byte 102,65,15,56,223,228
-.byte 102,65,15,56,223,237
-.byte 102,65,15,56,223,246
-.byte 102,65,15,56,223,255
- .byte 0xf3,0xc3
-.size __ocb_decrypt6,.-__ocb_decrypt6
-
-.type __ocb_decrypt4,@function
-.align 32
-__ocb_decrypt4:
- pxor %xmm9,%xmm15
- movdqu (%rbx,%r12,1),%xmm11
- movdqa %xmm10,%xmm12
- movdqu (%rbx,%r13,1),%xmm13
- pxor %xmm15,%xmm10
- pxor %xmm10,%xmm11
- pxor %xmm10,%xmm2
- pxor %xmm11,%xmm12
- pxor %xmm11,%xmm3
- pxor %xmm12,%xmm13
- pxor %xmm12,%xmm4
- pxor %xmm13,%xmm5
- movups 32(%r11),%xmm0
-
- pxor %xmm9,%xmm10
- pxor %xmm9,%xmm11
- pxor %xmm9,%xmm12
- pxor %xmm9,%xmm13
-
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
- movups 48(%r11),%xmm1
-
-.byte 102,15,56,222,208
-.byte 102,15,56,222,216
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
- movups 64(%r11),%xmm0
- jmp .Locb_dec_loop4
-
-.align 32
-.Locb_dec_loop4:
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,222,208
-.byte 102,15,56,222,216
-.byte 102,15,56,222,224
-.byte 102,15,56,222,232
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Locb_dec_loop4
-
-.byte 102,15,56,222,209
-.byte 102,15,56,222,217
-.byte 102,15,56,222,225
-.byte 102,15,56,222,233
- movups 16(%r11),%xmm1
- movq %r10,%rax
-
-.byte 102,65,15,56,223,210
-.byte 102,65,15,56,223,219
-.byte 102,65,15,56,223,228
-.byte 102,65,15,56,223,237
- .byte 0xf3,0xc3
-.size __ocb_decrypt4,.-__ocb_decrypt4
-
-.type __ocb_decrypt1,@function
-.align 32
-__ocb_decrypt1:
- pxor %xmm15,%xmm7
- pxor %xmm9,%xmm7
- pxor %xmm7,%xmm2
- movups 32(%r11),%xmm0
-
-.byte 102,15,56,222,209
- movups 48(%r11),%xmm1
- pxor %xmm9,%xmm7
-
-.byte 102,15,56,222,208
- movups 64(%r11),%xmm0
- jmp .Locb_dec_loop1
-
-.align 32
-.Locb_dec_loop1:
-.byte 102,15,56,222,209
- movups (%rcx,%rax,1),%xmm1
- addq $32,%rax
-
-.byte 102,15,56,222,208
- movups -16(%rcx,%rax,1),%xmm0
- jnz .Locb_dec_loop1
-
-.byte 102,15,56,222,209
- movups 16(%r11),%xmm1
- movq %r10,%rax
-
-.byte 102,15,56,223,215
- .byte 0xf3,0xc3
-.size __ocb_decrypt1,.-__ocb_decrypt1
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,@function
@@ -3453,12 +1496,12 @@ aes_hw_cbc_encrypt:
xorps %xmm0,%xmm3
leaq 32(%rcx),%rcx
xorps %xmm3,%xmm2
-.Loop_enc1_15:
+.Loop_enc1_6:
.byte 102,15,56,220,209
decl %eax
movups (%rcx),%xmm1
leaq 16(%rcx),%rcx
- jnz .Loop_enc1_15
+ jnz .Loop_enc1_6
.byte 102,15,56,221,209
movl %r10d,%eax
movq %r11,%rcx
@@ -3504,12 +1547,12 @@ aes_hw_cbc_encrypt:
movups 16(%rcx),%xmm1
leaq 32(%rcx),%rcx
xorps %xmm0,%xmm2
-.Loop_dec1_16:
+.Loop_dec1_7:
.byte 102,15,56,222,209
decl %r10d
movups (%rcx),%xmm1
leaq 16(%rcx),%rcx
- jnz .Loop_dec1_16
+ jnz .Loop_dec1_7
.byte 102,15,56,223,209
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
@@ -3922,12 +1965,12 @@ aes_hw_cbc_encrypt:
movups 16(%rcx),%xmm1
leaq 32(%rcx),%rcx
xorps %xmm0,%xmm2
-.Loop_dec1_17:
+.Loop_dec1_8:
.byte 102,15,56,222,209
decl %eax
movups (%rcx),%xmm1
leaq 16(%rcx),%rcx
- jnz .Loop_dec1_17
+ jnz .Loop_dec1_8
.byte 102,15,56,223,209
xorps %xmm10,%xmm2
movaps %xmm11,%xmm10
@@ -4068,6 +2111,11 @@ aes_hw_set_decrypt_key:
aes_hw_set_encrypt_key:
__aesni_set_encrypt_key:
.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+ movb $1,BORINGSSL_function_hit+3(%rip)
+#endif
+#endif
.byte 0x48,0x83,0xEC,0x08
.cfi_adjust_cfa_offset 8
movq $-1,%rax
diff --git a/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S b/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
index 36c01ef9..5236aa66 100644
--- a/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
@@ -21,6 +21,7 @@
.type _bsaes_encrypt8,@function
.align 64
_bsaes_encrypt8:
+.cfi_startproc
leaq .LBS0(%rip),%r11
movdqa (%rax),%xmm8
@@ -488,11 +489,13 @@ _bsaes_encrypt8_bitslice:
pxor %xmm7,%xmm15
pxor %xmm7,%xmm0
.byte 0xf3,0xc3
+.cfi_endproc
.size _bsaes_encrypt8,.-_bsaes_encrypt8
.type _bsaes_decrypt8,@function
.align 64
_bsaes_decrypt8:
+.cfi_startproc
leaq .LBS0(%rip),%r11
movdqa (%rax),%xmm8
@@ -994,10 +997,12 @@ _bsaes_decrypt8:
pxor %xmm7,%xmm15
pxor %xmm7,%xmm0
.byte 0xf3,0xc3
+.cfi_endproc
.size _bsaes_decrypt8,.-_bsaes_decrypt8
.type _bsaes_key_convert,@function
.align 16
_bsaes_key_convert:
+.cfi_startproc
leaq .Lmasks(%rip),%r11
movdqu (%rcx),%xmm7
leaq 16(%rcx),%rcx
@@ -1076,6 +1081,7 @@ _bsaes_key_convert:
movdqa 80(%r11),%xmm7
.byte 0xf3,0xc3
+.cfi_endproc
.size _bsaes_key_convert,.-_bsaes_key_convert
.extern aes_nohw_cbc_encrypt
.hidden aes_nohw_cbc_encrypt
@@ -1359,6 +1365,13 @@ bsaes_cbc_encrypt:
.align 16
bsaes_ctr32_encrypt_blocks:
.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern BORINGSSL_function_hit
+.hidden BORINGSSL_function_hit
+ movb $1,BORINGSSL_function_hit+6(%rip)
+#endif
+#endif
movq %rsp,%rax
.Lctr_enc_prologue:
pushq %rbp
diff --git a/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S b/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
new file mode 100644
index 00000000..ecf5b66f
--- /dev/null
+++ b/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@@ -0,0 +1,426 @@
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+.text
+
+
+
+
+
+.type gcm_gmult_ssse3, @function
+.globl gcm_gmult_ssse3
+.hidden gcm_gmult_ssse3
+.align 16
+gcm_gmult_ssse3:
+.cfi_startproc
+.Lgmult_seh_begin:
+ movdqu (%rdi),%xmm0
+ movdqa .Lreverse_bytes(%rip),%xmm10
+ movdqa .Llow4_mask(%rip),%xmm2
+
+
+.byte 102,65,15,56,0,194
+
+
+ movdqa %xmm2,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm2,%xmm0
+
+
+
+
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ movq $5,%rax
+.Loop_row_1:
+ movdqa (%rsi),%xmm4
+ leaq 16(%rsi),%rsi
+
+
+ movdqa %xmm2,%xmm6
+.byte 102,15,58,15,243,1
+ movdqa %xmm6,%xmm3
+ psrldq $1,%xmm2
+
+
+
+
+ movdqa %xmm4,%xmm5
+.byte 102,15,56,0,224
+.byte 102,15,56,0,233
+
+
+ pxor %xmm5,%xmm2
+
+
+
+ movdqa %xmm4,%xmm5
+ psllq $60,%xmm5
+ movdqa %xmm5,%xmm6
+ pslldq $8,%xmm6
+ pxor %xmm6,%xmm3
+
+
+ psrldq $8,%xmm5
+ pxor %xmm5,%xmm2
+ psrlq $4,%xmm4
+ pxor %xmm4,%xmm2
+
+ subq $1,%rax
+ jnz .Loop_row_1
+
+
+
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $5,%xmm3
+ pxor %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ movq $5,%rax
+.Loop_row_2:
+ movdqa (%rsi),%xmm4
+ leaq 16(%rsi),%rsi
+
+
+ movdqa %xmm2,%xmm6
+.byte 102,15,58,15,243,1
+ movdqa %xmm6,%xmm3
+ psrldq $1,%xmm2
+
+
+
+
+ movdqa %xmm4,%xmm5
+.byte 102,15,56,0,224
+.byte 102,15,56,0,233
+
+
+ pxor %xmm5,%xmm2
+
+
+
+ movdqa %xmm4,%xmm5
+ psllq $60,%xmm5
+ movdqa %xmm5,%xmm6
+ pslldq $8,%xmm6
+ pxor %xmm6,%xmm3
+
+
+ psrldq $8,%xmm5
+ pxor %xmm5,%xmm2
+ psrlq $4,%xmm4
+ pxor %xmm4,%xmm2
+
+ subq $1,%rax
+ jnz .Loop_row_2
+
+
+
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $5,%xmm3
+ pxor %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ movq $6,%rax
+.Loop_row_3:
+ movdqa (%rsi),%xmm4
+ leaq 16(%rsi),%rsi
+
+
+ movdqa %xmm2,%xmm6
+.byte 102,15,58,15,243,1
+ movdqa %xmm6,%xmm3
+ psrldq $1,%xmm2
+
+
+
+
+ movdqa %xmm4,%xmm5
+.byte 102,15,56,0,224
+.byte 102,15,56,0,233
+
+
+ pxor %xmm5,%xmm2
+
+
+
+ movdqa %xmm4,%xmm5
+ psllq $60,%xmm5
+ movdqa %xmm5,%xmm6
+ pslldq $8,%xmm6
+ pxor %xmm6,%xmm3
+
+
+ psrldq $8,%xmm5
+ pxor %xmm5,%xmm2
+ psrlq $4,%xmm4
+ pxor %xmm4,%xmm2
+
+ subq $1,%rax
+ jnz .Loop_row_3
+
+
+
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $5,%xmm3
+ pxor %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+
+.byte 102,65,15,56,0,210
+ movdqu %xmm2,(%rdi)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ .byte 0xf3,0xc3
+.Lgmult_seh_end:
+.cfi_endproc
+.size gcm_gmult_ssse3,.-gcm_gmult_ssse3
+
+
+
+
+
+.type gcm_ghash_ssse3, @function
+.globl gcm_ghash_ssse3
+.hidden gcm_ghash_ssse3
+.align 16
+gcm_ghash_ssse3:
+.Lghash_seh_begin:
+.cfi_startproc
+ movdqu (%rdi),%xmm0
+ movdqa .Lreverse_bytes(%rip),%xmm10
+ movdqa .Llow4_mask(%rip),%xmm11
+
+
+ andq $-16,%rcx
+
+
+
+.byte 102,65,15,56,0,194
+
+
+ pxor %xmm3,%xmm3
+.Loop_ghash:
+
+ movdqu (%rdx),%xmm1
+.byte 102,65,15,56,0,202
+ pxor %xmm1,%xmm0
+
+
+ movdqa %xmm11,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm11,%xmm0
+
+
+
+
+ pxor %xmm2,%xmm2
+
+ movq $5,%rax
+.Loop_row_4:
+ movdqa (%rsi),%xmm4
+ leaq 16(%rsi),%rsi
+
+
+ movdqa %xmm2,%xmm6
+.byte 102,15,58,15,243,1
+ movdqa %xmm6,%xmm3
+ psrldq $1,%xmm2
+
+
+
+
+ movdqa %xmm4,%xmm5
+.byte 102,15,56,0,224
+.byte 102,15,56,0,233
+
+
+ pxor %xmm5,%xmm2
+
+
+
+ movdqa %xmm4,%xmm5
+ psllq $60,%xmm5
+ movdqa %xmm5,%xmm6
+ pslldq $8,%xmm6
+ pxor %xmm6,%xmm3
+
+
+ psrldq $8,%xmm5
+ pxor %xmm5,%xmm2
+ psrlq $4,%xmm4
+ pxor %xmm4,%xmm2
+
+ subq $1,%rax
+ jnz .Loop_row_4
+
+
+
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $5,%xmm3
+ pxor %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ movq $5,%rax
+.Loop_row_5:
+ movdqa (%rsi),%xmm4
+ leaq 16(%rsi),%rsi
+
+
+ movdqa %xmm2,%xmm6
+.byte 102,15,58,15,243,1
+ movdqa %xmm6,%xmm3
+ psrldq $1,%xmm2
+
+
+
+
+ movdqa %xmm4,%xmm5
+.byte 102,15,56,0,224
+.byte 102,15,56,0,233
+
+
+ pxor %xmm5,%xmm2
+
+
+
+ movdqa %xmm4,%xmm5
+ psllq $60,%xmm5
+ movdqa %xmm5,%xmm6
+ pslldq $8,%xmm6
+ pxor %xmm6,%xmm3
+
+
+ psrldq $8,%xmm5
+ pxor %xmm5,%xmm2
+ psrlq $4,%xmm4
+ pxor %xmm4,%xmm2
+
+ subq $1,%rax
+ jnz .Loop_row_5
+
+
+
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $5,%xmm3
+ pxor %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ movq $6,%rax
+.Loop_row_6:
+ movdqa (%rsi),%xmm4
+ leaq 16(%rsi),%rsi
+
+
+ movdqa %xmm2,%xmm6
+.byte 102,15,58,15,243,1
+ movdqa %xmm6,%xmm3
+ psrldq $1,%xmm2
+
+
+
+
+ movdqa %xmm4,%xmm5
+.byte 102,15,56,0,224
+.byte 102,15,56,0,233
+
+
+ pxor %xmm5,%xmm2
+
+
+
+ movdqa %xmm4,%xmm5
+ psllq $60,%xmm5
+ movdqa %xmm5,%xmm6
+ pslldq $8,%xmm6
+ pxor %xmm6,%xmm3
+
+
+ psrldq $8,%xmm5
+ pxor %xmm5,%xmm2
+ psrlq $4,%xmm4
+ pxor %xmm4,%xmm2
+
+ subq $1,%rax
+ jnz .Loop_row_6
+
+
+
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $1,%xmm3
+ pxor %xmm3,%xmm2
+ psrlq $5,%xmm3
+ pxor %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ movdqa %xmm2,%xmm0
+
+
+ leaq -256(%rsi),%rsi
+
+
+ leaq 16(%rdx),%rdx
+ subq $16,%rcx
+ jnz .Loop_ghash
+
+
+.byte 102,65,15,56,0,194
+ movdqu %xmm0,(%rdi)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ .byte 0xf3,0xc3
+.Lghash_seh_end:
+.cfi_endproc
+.size gcm_ghash_ssse3,.-gcm_ghash_ssse3
+
+.align 16
+
+
+.Lreverse_bytes:
+.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+.Llow4_mask:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+#endif
diff --git a/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S b/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
index ed0946d9..0b36afac 100644
--- a/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
@@ -722,6 +722,7 @@ gcm_ghash_4bit:
.type gcm_init_clmul,@function
.align 16
gcm_init_clmul:
+.cfi_startproc
.L_init_clmul:
movdqu (%rsi),%xmm2
pshufd $78,%xmm2,%xmm2
@@ -873,12 +874,14 @@ gcm_init_clmul:
.byte 102,15,58,15,227,8
movdqu %xmm4,80(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_init_clmul,.-gcm_init_clmul
.globl gcm_gmult_clmul
.hidden gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
+.cfi_startproc
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@@ -925,12 +928,14 @@ gcm_gmult_clmul:
.byte 102,15,56,0,197
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_gmult_clmul,.-gcm_gmult_clmul
.globl gcm_ghash_clmul
.hidden gcm_ghash_clmul
.type gcm_ghash_clmul,@function
.align 32
gcm_ghash_clmul:
+.cfi_startproc
.L_ghash_clmul:
movdqa .Lbswap_mask(%rip),%xmm10
@@ -1310,12 +1315,14 @@ gcm_ghash_clmul:
.byte 102,65,15,56,0,194
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_ghash_clmul,.-gcm_ghash_clmul
.globl gcm_init_avx
.hidden gcm_init_avx
.type gcm_init_avx,@function
.align 32
gcm_init_avx:
+.cfi_startproc
vzeroupper
vmovdqu (%rsi),%xmm2
@@ -1418,19 +1425,23 @@ gcm_init_avx:
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_init_avx,.-gcm_init_avx
.globl gcm_gmult_avx
.hidden gcm_gmult_avx
.type gcm_gmult_avx,@function
.align 32
gcm_gmult_avx:
+.cfi_startproc
jmp .L_gmult_clmul
+.cfi_endproc
.size gcm_gmult_avx,.-gcm_gmult_avx
.globl gcm_ghash_avx
.hidden gcm_ghash_avx
.type gcm_ghash_avx,@function
.align 32
gcm_ghash_avx:
+.cfi_startproc
vzeroupper
vmovdqu (%rdi),%xmm10
@@ -1802,6 +1813,7 @@ gcm_ghash_avx:
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_ghash_avx,.-gcm_ghash_avx
.align 64
.Lbswap_mask:
diff --git a/linux-x86_64/crypto/fipsmodule/md5-x86_64.S b/linux-x86_64/crypto/fipsmodule/md5-x86_64.S
index 6d08f173..18e2e928 100644
--- a/linux-x86_64/crypto/fipsmodule/md5-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/md5-x86_64.S
@@ -18,11 +18,22 @@
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
+.cfi_startproc
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset r12,-32
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset r14,-40
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset r15,-48
.Lprologue:
@@ -672,12 +683,19 @@ md5_block_asm_data_order:
movl %edx,12(%rbp)
movq (%rsp),%r15
+.cfi_restore r15
movq 8(%rsp),%r14
+.cfi_restore r14
movq 16(%rsp),%r12
+.cfi_restore r12
movq 24(%rsp),%rbx
+.cfi_restore rbx
movq 32(%rsp),%rbp
+.cfi_restore rbp
addq $40,%rsp
+.cfi_adjust_cfa_offset -40
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
#endif
diff --git a/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S b/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
index 6a3cb1c6..3a575228 100644
--- a/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
+++ b/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
@@ -1267,6 +1267,7 @@ ecp_nistz256_mul_mont:
.type __ecp_nistz256_mul_montq,@function
.align 32
__ecp_nistz256_mul_montq:
+.cfi_startproc
movq %rax,%rbp
@@ -1478,6 +1479,7 @@ __ecp_nistz256_mul_montq:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
@@ -1557,6 +1559,7 @@ ecp_nistz256_sqr_mont:
.type __ecp_nistz256_sqr_montq,@function
.align 32
__ecp_nistz256_sqr_montq:
+.cfi_startproc
movq %rax,%r13
mulq %r14
movq %rax,%r9
@@ -1714,10 +1717,12 @@ __ecp_nistz256_sqr_montq:
movq %r15,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
.type __ecp_nistz256_mul_montx,@function
.align 32
__ecp_nistz256_mul_montx:
+.cfi_startproc
mulxq %r9,%r8,%r9
@@ -1880,11 +1885,13 @@ __ecp_nistz256_mul_montx:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
.type __ecp_nistz256_sqr_montx,@function
.align 32
__ecp_nistz256_sqr_montx:
+.cfi_startproc
mulxq %r14,%r9,%r10
mulxq %r15,%rcx,%r11
xorl %eax,%eax
@@ -2008,6 +2015,7 @@ __ecp_nistz256_sqr_montx:
movq %r15,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
@@ -2016,6 +2024,7 @@ __ecp_nistz256_sqr_montx:
.type ecp_nistz256_select_w5,@function
.align 32
ecp_nistz256_select_w5:
+.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
@@ -2071,6 +2080,7 @@ ecp_nistz256_select_w5:
movdqu %xmm6,64(%rdi)
movdqu %xmm7,80(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.LSEH_end_ecp_nistz256_select_w5:
.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
@@ -2081,6 +2091,7 @@ ecp_nistz256_select_w5:
.type ecp_nistz256_select_w7,@function
.align 32
ecp_nistz256_select_w7:
+.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
@@ -2125,6 +2136,7 @@ ecp_nistz256_select_w7:
movdqu %xmm4,32(%rdi)
movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.LSEH_end_ecp_nistz256_select_w7:
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
@@ -2132,6 +2144,7 @@ ecp_nistz256_select_w7:
.type ecp_nistz256_avx2_select_w5,@function
.align 32
ecp_nistz256_avx2_select_w5:
+.cfi_startproc
.Lavx2_select_w5:
vzeroupper
vmovdqa .LTwo(%rip),%ymm0
@@ -2186,6 +2199,7 @@ ecp_nistz256_avx2_select_w5:
vmovdqu %ymm4,64(%rdi)
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.LSEH_end_ecp_nistz256_avx2_select_w5:
.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
@@ -2196,6 +2210,7 @@ ecp_nistz256_avx2_select_w5:
.type ecp_nistz256_avx2_select_w7,@function
.align 32
ecp_nistz256_avx2_select_w7:
+.cfi_startproc
.Lavx2_select_w7:
vzeroupper
vmovdqa .LThree(%rip),%ymm0
@@ -2265,11 +2280,13 @@ ecp_nistz256_avx2_select_w7:
vmovdqu %ymm3,32(%rdi)
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.LSEH_end_ecp_nistz256_avx2_select_w7:
.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
+.cfi_startproc
xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
@@ -2297,11 +2314,13 @@ __ecp_nistz256_add_toq:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
.type __ecp_nistz256_sub_fromq,@function
.align 32
__ecp_nistz256_sub_fromq:
+.cfi_startproc
subq 0(%rbx),%r12
sbbq 8(%rbx),%r13
movq %r12,%rax
@@ -2328,11 +2347,13 @@ __ecp_nistz256_sub_fromq:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
.type __ecp_nistz256_subq,@function
.align 32
__ecp_nistz256_subq:
+.cfi_startproc
subq %r12,%rax
sbbq %r13,%rbp
movq %rax,%r12
@@ -2355,11 +2376,13 @@ __ecp_nistz256_subq:
cmovnzq %r10,%r9
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_subq,.-__ecp_nistz256_subq
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
+.cfi_startproc
xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
@@ -2387,6 +2410,7 @@ __ecp_nistz256_mul_by_2q:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
.globl ecp_nistz256_point_double
.hidden ecp_nistz256_point_double
@@ -2823,7 +2847,9 @@ ecp_nistz256_point_add:
.byte 102,72,15,126,206
.byte 102,72,15,126,199
addq $416,%rsp
+.cfi_adjust_cfa_offset -416
jmp .Lpoint_double_shortcutq
+.cfi_adjust_cfa_offset 416
.align 32
.Ladd_proceedq:
@@ -3387,6 +3413,7 @@ ecp_nistz256_point_add_affine:
.type __ecp_nistz256_add_tox,@function
.align 32
__ecp_nistz256_add_tox:
+.cfi_startproc
xorq %r11,%r11
adcq 0(%rbx),%r12
adcq 8(%rbx),%r13
@@ -3415,11 +3442,13 @@ __ecp_nistz256_add_tox:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
.type __ecp_nistz256_sub_fromx,@function
.align 32
__ecp_nistz256_sub_fromx:
+.cfi_startproc
xorq %r11,%r11
sbbq 0(%rbx),%r12
sbbq 8(%rbx),%r13
@@ -3448,11 +3477,13 @@ __ecp_nistz256_sub_fromx:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
.type __ecp_nistz256_subx,@function
.align 32
__ecp_nistz256_subx:
+.cfi_startproc
xorq %r11,%r11
sbbq %r12,%rax
sbbq %r13,%rbp
@@ -3477,11 +3508,13 @@ __ecp_nistz256_subx:
cmovcq %r10,%r9
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
.type __ecp_nistz256_mul_by_2x,@function
.align 32
__ecp_nistz256_mul_by_2x:
+.cfi_startproc
xorq %r11,%r11
adcq %r12,%r12
adcq %r13,%r13
@@ -3510,6 +3543,7 @@ __ecp_nistz256_mul_by_2x:
movq %r9,24(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
.type ecp_nistz256_point_doublex,@function
.align 32
@@ -3934,7 +3968,9 @@ ecp_nistz256_point_addx:
.byte 102,72,15,126,206
.byte 102,72,15,126,199
addq $416,%rsp
+.cfi_adjust_cfa_offset -416
jmp .Lpoint_double_shortcutx
+.cfi_adjust_cfa_offset 416
.align 32
.Ladd_proceedx:
diff --git a/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S b/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
index 98a2f8f4..5dfecc85 100644
--- a/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
+++ b/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
@@ -23,23 +23,27 @@ beeu_mod_inverse_vartime:
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp,-16
- movq %rsp,%rbp
-.cfi_def_cfa_register rbp
-
pushq %r12
+.cfi_adjust_cfa_offset 8
.cfi_offset r12,-24
pushq %r13
+.cfi_adjust_cfa_offset 8
.cfi_offset r13,-32
pushq %r14
+.cfi_adjust_cfa_offset 8
.cfi_offset r14,-40
pushq %r15
+.cfi_adjust_cfa_offset 8
.cfi_offset r15,-48
pushq %rbx
+.cfi_adjust_cfa_offset 8
.cfi_offset rbx,-56
pushq %rsi
+.cfi_adjust_cfa_offset 8
.cfi_offset rsi,-64
subq $80,%rsp
+.cfi_adjust_cfa_offset 80
movq %rdi,0(%rsp)
@@ -309,23 +313,30 @@ beeu_mod_inverse_vartime:
.Lbeeu_finish:
addq $80,%rsp
+.cfi_adjust_cfa_offset -80
popq %rsi
+.cfi_adjust_cfa_offset -8
.cfi_restore rsi
popq %rbx
+.cfi_adjust_cfa_offset -8
.cfi_restore rbx
popq %r15
+.cfi_adjust_cfa_offset -8
.cfi_restore r15
popq %r14
+.cfi_adjust_cfa_offset -8
.cfi_restore r14
popq %r13
+.cfi_adjust_cfa_offset -8
.cfi_restore r13
popq %r12
+.cfi_adjust_cfa_offset -8
.cfi_restore r12
popq %rbp
+.cfi_adjust_cfa_offset -8
.cfi_restore rbp
-.cfi_def_cfa rsp, 8
-.cfi_endproc
.byte 0xf3,0xc3
+.cfi_endproc
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
#endif
diff --git a/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S b/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
index d7b0cb4b..fefccd6f 100644
--- a/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
@@ -23,14 +23,13 @@
CRYPTO_rdrand:
.cfi_startproc
xorq %rax,%rax
-
-
-.byte 0x48, 0x0f, 0xc7, 0xf1
+.byte 72,15,199,242
adcq %rax,%rax
- movq %rcx,0(%rdi)
+ movq %rdx,0(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
+.size CRYPTO_rdrand,.-CRYPTO_rdrand
@@ -46,9 +45,7 @@ CRYPTO_rdrand_multiple8_buf:
jz .Lout
movq $8,%rdx
.Lloop:
-
-
-.byte 0x48, 0x0f, 0xc7, 0xf1
+.byte 72,15,199,241
jnc .Lerr
movq %rcx,0(%rdi)
addq %rdx,%rdi
@@ -61,4 +58,5 @@ CRYPTO_rdrand_multiple8_buf:
xorq %rax,%rax
.byte 0xf3,0xc3
.cfi_endproc
+.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
#endif
diff --git a/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S b/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
index 4ca2cab0..579c7055 100644
--- a/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
+++ b/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
@@ -1228,6 +1228,7 @@ rsaz_1024_mul_avx2:
.type rsaz_1024_red2norm_avx2,@function
.align 32
rsaz_1024_red2norm_avx2:
+.cfi_startproc
subq $-128,%rsi
xorq %rax,%rax
movq -128(%rsi),%r8
@@ -1419,6 +1420,7 @@ rsaz_1024_red2norm_avx2:
movq %rax,120(%rdi)
movq %r11,%rax
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
.globl rsaz_1024_norm2red_avx2
@@ -1426,6 +1428,7 @@ rsaz_1024_red2norm_avx2:
.type rsaz_1024_norm2red_avx2,@function
.align 32
rsaz_1024_norm2red_avx2:
+.cfi_startproc
subq $-128,%rdi
movq (%rsi),%r8
movl $0x1fffffff,%eax
@@ -1578,12 +1581,14 @@ rsaz_1024_norm2red_avx2:
movq %r8,176(%rdi)
movq %r8,184(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
.globl rsaz_1024_scatter5_avx2
.hidden rsaz_1024_scatter5_avx2
.type rsaz_1024_scatter5_avx2,@function
.align 32
rsaz_1024_scatter5_avx2:
+.cfi_startproc
vzeroupper
vmovdqu .Lscatter_permd(%rip),%ymm5
shll $4,%edx
@@ -1603,6 +1608,7 @@ rsaz_1024_scatter5_avx2:
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
.globl rsaz_1024_gather5_avx2
@@ -1727,25 +1733,6 @@ rsaz_1024_gather5_avx2:
.cfi_endproc
.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
-.extern OPENSSL_ia32cap_P
-.hidden OPENSSL_ia32cap_P
-.globl rsaz_avx2_eligible
-.hidden rsaz_avx2_eligible
-.type rsaz_avx2_eligible,@function
-.align 32
-rsaz_avx2_eligible:
- leaq OPENSSL_ia32cap_P(%rip),%rax
- movl 8(%rax),%eax
- movl $524544,%ecx
- movl $0,%edx
- andl %eax,%ecx
- cmpl $524544,%ecx
- cmovel %edx,%eax
- andl $32,%eax
- shrl $5,%eax
- .byte 0xf3,0xc3
-.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
-
.align 64
.Land_mask:
.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
diff --git a/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
index 41b113a3..4355438e 100644
--- a/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
@@ -31,6 +31,7 @@
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
+.cfi_startproc
movq %rdx,%r9
movq $16,%r11
movl 240(%rdx),%eax
@@ -111,6 +112,7 @@ _vpaes_encrypt_core:
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
@@ -121,6 +123,7 @@ _vpaes_encrypt_core:
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
+.cfi_startproc
movq %rdx,%r9
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
@@ -217,6 +220,7 @@ _vpaes_decrypt_core:
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
@@ -227,6 +231,7 @@ _vpaes_decrypt_core:
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
+.cfi_startproc
@@ -393,6 +398,7 @@ _vpaes_schedule_core:
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_schedule_core,.-_vpaes_schedule_core
@@ -412,6 +418,7 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
+.cfi_startproc
pshufd $0x80,%xmm6,%xmm1
pshufd $0xFE,%xmm7,%xmm0
pxor %xmm1,%xmm6
@@ -420,6 +427,7 @@ _vpaes_schedule_192_smear:
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
@@ -443,6 +451,7 @@ _vpaes_schedule_192_smear:
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
+.cfi_startproc
pxor %xmm1,%xmm1
.byte 102,65,15,58,15,200,15
@@ -496,6 +505,7 @@ _vpaes_schedule_low_round:
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_schedule_round,.-_vpaes_schedule_round
@@ -510,6 +520,7 @@ _vpaes_schedule_low_round:
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
+.cfi_startproc
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
@@ -520,6 +531,7 @@ _vpaes_schedule_transform:
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
@@ -548,6 +560,7 @@ _vpaes_schedule_transform:
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
+.cfi_startproc
movdqa %xmm0,%xmm4
movdqa .Lk_mc_forward(%rip),%xmm5
testq %rcx,%rcx
@@ -612,6 +625,7 @@ _vpaes_schedule_mangle:
andq $0x30,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
@@ -622,6 +636,15 @@ _vpaes_schedule_mangle:
.type vpaes_set_encrypt_key,@function
.align 16
vpaes_set_encrypt_key:
+.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern BORINGSSL_function_hit
+.hidden BORINGSSL_function_hit
+ movb $1,BORINGSSL_function_hit+5(%rip)
+#endif
+#endif
+
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@@ -632,6 +655,7 @@ vpaes_set_encrypt_key:
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
+.cfi_endproc
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
.globl vpaes_set_decrypt_key
@@ -639,6 +663,7 @@ vpaes_set_encrypt_key:
.type vpaes_set_decrypt_key,@function
.align 16
vpaes_set_decrypt_key:
+.cfi_startproc
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@@ -654,6 +679,7 @@ vpaes_set_decrypt_key:
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
+.cfi_endproc
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_encrypt
@@ -661,11 +687,20 @@ vpaes_set_decrypt_key:
.type vpaes_encrypt,@function
.align 16
vpaes_encrypt:
+.cfi_startproc
+#ifndef NDEBUG
+#ifndef BORINGSSL_FIPS
+.extern BORINGSSL_function_hit
+.hidden BORINGSSL_function_hit
+ movb $1,BORINGSSL_function_hit+4(%rip)
+#endif
+#endif
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_encrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
+.cfi_endproc
.size vpaes_encrypt,.-vpaes_encrypt
.globl vpaes_decrypt
@@ -673,17 +708,20 @@ vpaes_encrypt:
.type vpaes_decrypt,@function
.align 16
vpaes_decrypt:
+.cfi_startproc
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
+.cfi_endproc
.size vpaes_decrypt,.-vpaes_decrypt
.globl vpaes_cbc_encrypt
.hidden vpaes_cbc_encrypt
.type vpaes_cbc_encrypt,@function
.align 16
vpaes_cbc_encrypt:
+.cfi_startproc
xchgq %rcx,%rdx
subq $16,%rcx
jc .Lcbc_abort
@@ -719,6 +757,7 @@ vpaes_cbc_encrypt:
movdqu %xmm6,(%r8)
.Lcbc_abort:
.byte 0xf3,0xc3
+.cfi_endproc
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
@@ -729,6 +768,7 @@ vpaes_cbc_encrypt:
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
+.cfi_startproc
leaq .Lk_s0F(%rip),%r10
movdqa -32(%r10),%xmm10
movdqa -16(%r10),%xmm11
@@ -738,6 +778,7 @@ _vpaes_preheat:
movdqa 80(%r10),%xmm15
movdqa 96(%r10),%xmm14
.byte 0xf3,0xc3
+.cfi_endproc
.size _vpaes_preheat,.-_vpaes_preheat
diff --git a/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S b/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
index 8ac360dd..b12393e2 100644
--- a/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
+++ b/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
@@ -566,6 +566,7 @@ bn_mul4x_mont_gather5:
.type mul4x_internal,@function
.align 32
mul4x_internal:
+.cfi_startproc
shlq $5,%r9
movd 8(%rax),%xmm5
leaq .Linc(%rip),%rax
@@ -1087,6 +1088,7 @@ mul4x_internal:
movq 16(%rbp),%r14
movq 24(%rbp),%r15
jmp .Lsqr4x_sub_entry
+.cfi_endproc
.size mul4x_internal,.-mul4x_internal
.globl bn_power5
.hidden bn_power5
@@ -1232,6 +1234,7 @@ bn_power5:
.align 32
bn_sqr8x_internal:
__bn_sqr8x_internal:
+.cfi_startproc
@@ -2006,10 +2009,12 @@ __bn_sqr8x_reduction:
cmpq %rdx,%rdi
jb .L8x_reduction_loop
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_sqr8x_internal,.-bn_sqr8x_internal
.type __bn_post4x_internal,@function
.align 32
__bn_post4x_internal:
+.cfi_startproc
movq 0(%rbp),%r12
leaq (%rdi,%r9,1),%rbx
movq %r9,%rcx
@@ -2060,16 +2065,19 @@ __bn_post4x_internal:
movq %r9,%r10
negq %r9
.byte 0xf3,0xc3
+.cfi_endproc
.size __bn_post4x_internal,.-__bn_post4x_internal
.globl bn_from_montgomery
.hidden bn_from_montgomery
.type bn_from_montgomery,@function
.align 32
bn_from_montgomery:
+.cfi_startproc
testl $7,%r9d
jz bn_from_mont8x
xorl %eax,%eax
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_from_montgomery,.-bn_from_montgomery
.type bn_from_mont8x,@function
@@ -2354,6 +2362,7 @@ bn_mulx4x_mont_gather5:
.type mulx4x_internal,@function
.align 32
mulx4x_internal:
+.cfi_startproc
movq %r9,8(%rsp)
movq %r9,%r10
negq %r9
@@ -2772,6 +2781,7 @@ mulx4x_internal:
movq 16(%rbp),%r14
movq 24(%rbp),%r15
jmp .Lsqrx4x_sub_entry
+.cfi_endproc
.size mulx4x_internal,.-mulx4x_internal
.type bn_powerx5,@function
.align 32
@@ -2915,6 +2925,7 @@ bn_powerx5:
.align 32
bn_sqrx8x_internal:
__bn_sqrx8x_internal:
+.cfi_startproc
@@ -3526,9 +3537,12 @@ __bn_sqrx8x_reduction:
cmpq 8+8(%rsp),%r8
jb .Lsqrx8x_reduction_loop
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
.align 32
+.type __bn_postx4x_internal,@function
__bn_postx4x_internal:
+.cfi_startproc
movq 0(%rbp),%r12
movq %rcx,%r10
movq %rcx,%r9
@@ -3576,12 +3590,14 @@ __bn_postx4x_internal:
negq %r9
.byte 0xf3,0xc3
+.cfi_endproc
.size __bn_postx4x_internal,.-__bn_postx4x_internal
.globl bn_scatter5
.hidden bn_scatter5
.type bn_scatter5,@function
.align 16
bn_scatter5:
+.cfi_startproc
cmpl $0,%esi
jz .Lscatter_epilogue
leaq (%rdx,%rcx,8),%rdx
@@ -3594,6 +3610,7 @@ bn_scatter5:
jnz .Lscatter
.Lscatter_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_scatter5,.-bn_scatter5
.globl bn_gather5
@@ -3601,9 +3618,11 @@ bn_scatter5:
.type bn_gather5,@function
.align 32
bn_gather5:
+.cfi_startproc
.LSEH_begin_bn_gather5:
.byte 0x4c,0x8d,0x14,0x24
+.cfi_def_cfa_register %r10
.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
leaq .Linc(%rip),%rax
andq $-16,%rsp
@@ -3757,8 +3776,10 @@ bn_gather5:
jnz .Lgather
leaq (%r10),%rsp
+.cfi_def_cfa_register %rsp
.byte 0xf3,0xc3
.LSEH_end_bn_gather5:
+.cfi_endproc
.size bn_gather5,.-bn_gather5
.align 64
.Linc:
diff --git a/linux-x86_64/crypto/test/trampoline-x86_64.S b/linux-x86_64/crypto/test/trampoline-x86_64.S
new file mode 100644
index 00000000..91a13f3e
--- /dev/null
+++ b/linux-x86_64/crypto/test/trampoline-x86_64.S
@@ -0,0 +1,517 @@
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+.text
+
+
+
+
+
+
+
+
+.type abi_test_trampoline, @function
+.globl abi_test_trampoline
+.hidden abi_test_trampoline
+.align 16
+abi_test_trampoline:
+.Labi_test_trampoline_seh_begin:
+.cfi_startproc
+
+
+
+
+
+
+
+
+
+ subq $120,%rsp
+.cfi_adjust_cfa_offset 120
+.Labi_test_trampoline_seh_prolog_alloc:
+ movq %r8,48(%rsp)
+ movq %rbx,64(%rsp)
+.cfi_offset rbx, -64
+.Labi_test_trampoline_seh_prolog_rbx:
+ movq %rbp,72(%rsp)
+.cfi_offset rbp, -56
+.Labi_test_trampoline_seh_prolog_rbp:
+ movq %r12,80(%rsp)
+.cfi_offset r12, -48
+.Labi_test_trampoline_seh_prolog_r12:
+ movq %r13,88(%rsp)
+.cfi_offset r13, -40
+.Labi_test_trampoline_seh_prolog_r13:
+ movq %r14,96(%rsp)
+.cfi_offset r14, -32
+.Labi_test_trampoline_seh_prolog_r14:
+ movq %r15,104(%rsp)
+.cfi_offset r15, -24
+.Labi_test_trampoline_seh_prolog_r15:
+.Labi_test_trampoline_seh_prolog_end:
+ movq 0(%rsi),%rbx
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%r12
+ movq 24(%rsi),%r13
+ movq 32(%rsi),%r14
+ movq 40(%rsi),%r15
+
+ movq %rdi,32(%rsp)
+ movq %rsi,40(%rsp)
+
+
+
+
+ movq %rdx,%r10
+ movq %rcx,%r11
+ decq %r11
+ js .Largs_done
+ movq (%r10),%rdi
+ addq $8,%r10
+ decq %r11
+ js .Largs_done
+ movq (%r10),%rsi
+ addq $8,%r10
+ decq %r11
+ js .Largs_done
+ movq (%r10),%rdx
+ addq $8,%r10
+ decq %r11
+ js .Largs_done
+ movq (%r10),%rcx
+ addq $8,%r10
+ decq %r11
+ js .Largs_done
+ movq (%r10),%r8
+ addq $8,%r10
+ decq %r11
+ js .Largs_done
+ movq (%r10),%r9
+ addq $8,%r10
+ leaq 0(%rsp),%rax
+.Largs_loop:
+ decq %r11
+ js .Largs_done
+
+
+
+
+
+
+ movq %r11,56(%rsp)
+ movq (%r10),%r11
+ movq %r11,(%rax)
+ movq 56(%rsp),%r11
+
+ addq $8,%r10
+ addq $8,%rax
+ jmp .Largs_loop
+
+.Largs_done:
+ movq 32(%rsp),%rax
+ movq 48(%rsp),%r10
+ testq %r10,%r10
+ jz .Lno_unwind
+
+
+ pushfq
+ orq $0x100,0(%rsp)
+ popfq
+
+
+
+ nop
+.globl abi_test_unwind_start
+.hidden abi_test_unwind_start
+abi_test_unwind_start:
+
+ call *%rax
+.globl abi_test_unwind_return
+.hidden abi_test_unwind_return
+abi_test_unwind_return:
+
+
+
+
+ pushfq
+ andq $-0x101,0(%rsp)
+ popfq
+.globl abi_test_unwind_stop
+.hidden abi_test_unwind_stop
+abi_test_unwind_stop:
+
+ jmp .Lcall_done
+
+.Lno_unwind:
+ call *%rax
+
+.Lcall_done:
+
+ movq 40(%rsp),%rsi
+ movq %rbx,0(%rsi)
+ movq %rbp,8(%rsi)
+ movq %r12,16(%rsi)
+ movq %r13,24(%rsi)
+ movq %r14,32(%rsi)
+ movq %r15,40(%rsi)
+ movq 64(%rsp),%rbx
+.cfi_restore rbx
+ movq 72(%rsp),%rbp
+.cfi_restore rbp
+ movq 80(%rsp),%r12
+.cfi_restore r12
+ movq 88(%rsp),%r13
+.cfi_restore r13
+ movq 96(%rsp),%r14
+.cfi_restore r14
+ movq 104(%rsp),%r15
+.cfi_restore r15
+ addq $120,%rsp
+.cfi_adjust_cfa_offset -120
+
+
+ .byte 0xf3,0xc3
+.cfi_endproc
+.Labi_test_trampoline_seh_end:
+.size abi_test_trampoline,.-abi_test_trampoline
+.type abi_test_clobber_rax, @function
+.globl abi_test_clobber_rax
+.hidden abi_test_clobber_rax
+.align 16
+abi_test_clobber_rax:
+ xorq %rax,%rax
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rax,.-abi_test_clobber_rax
+.type abi_test_clobber_rbx, @function
+.globl abi_test_clobber_rbx
+.hidden abi_test_clobber_rbx
+.align 16
+abi_test_clobber_rbx:
+ xorq %rbx,%rbx
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rbx,.-abi_test_clobber_rbx
+.type abi_test_clobber_rcx, @function
+.globl abi_test_clobber_rcx
+.hidden abi_test_clobber_rcx
+.align 16
+abi_test_clobber_rcx:
+ xorq %rcx,%rcx
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rcx,.-abi_test_clobber_rcx
+.type abi_test_clobber_rdx, @function
+.globl abi_test_clobber_rdx
+.hidden abi_test_clobber_rdx
+.align 16
+abi_test_clobber_rdx:
+ xorq %rdx,%rdx
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rdx,.-abi_test_clobber_rdx
+.type abi_test_clobber_rdi, @function
+.globl abi_test_clobber_rdi
+.hidden abi_test_clobber_rdi
+.align 16
+abi_test_clobber_rdi:
+ xorq %rdi,%rdi
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rdi,.-abi_test_clobber_rdi
+.type abi_test_clobber_rsi, @function
+.globl abi_test_clobber_rsi
+.hidden abi_test_clobber_rsi
+.align 16
+abi_test_clobber_rsi:
+ xorq %rsi,%rsi
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rsi,.-abi_test_clobber_rsi
+.type abi_test_clobber_rbp, @function
+.globl abi_test_clobber_rbp
+.hidden abi_test_clobber_rbp
+.align 16
+abi_test_clobber_rbp:
+ xorq %rbp,%rbp
+ .byte 0xf3,0xc3
+.size abi_test_clobber_rbp,.-abi_test_clobber_rbp
+.type abi_test_clobber_r8, @function
+.globl abi_test_clobber_r8
+.hidden abi_test_clobber_r8
+.align 16
+abi_test_clobber_r8:
+ xorq %r8,%r8
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r8,.-abi_test_clobber_r8
+.type abi_test_clobber_r9, @function
+.globl abi_test_clobber_r9
+.hidden abi_test_clobber_r9
+.align 16
+abi_test_clobber_r9:
+ xorq %r9,%r9
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r9,.-abi_test_clobber_r9
+.type abi_test_clobber_r10, @function
+.globl abi_test_clobber_r10
+.hidden abi_test_clobber_r10
+.align 16
+abi_test_clobber_r10:
+ xorq %r10,%r10
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r10,.-abi_test_clobber_r10
+.type abi_test_clobber_r11, @function
+.globl abi_test_clobber_r11
+.hidden abi_test_clobber_r11
+.align 16
+abi_test_clobber_r11:
+ xorq %r11,%r11
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r11,.-abi_test_clobber_r11
+.type abi_test_clobber_r12, @function
+.globl abi_test_clobber_r12
+.hidden abi_test_clobber_r12
+.align 16
+abi_test_clobber_r12:
+ xorq %r12,%r12
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r12,.-abi_test_clobber_r12
+.type abi_test_clobber_r13, @function
+.globl abi_test_clobber_r13
+.hidden abi_test_clobber_r13
+.align 16
+abi_test_clobber_r13:
+ xorq %r13,%r13
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r13,.-abi_test_clobber_r13
+.type abi_test_clobber_r14, @function
+.globl abi_test_clobber_r14
+.hidden abi_test_clobber_r14
+.align 16
+abi_test_clobber_r14:
+ xorq %r14,%r14
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r14,.-abi_test_clobber_r14
+.type abi_test_clobber_r15, @function
+.globl abi_test_clobber_r15
+.hidden abi_test_clobber_r15
+.align 16
+abi_test_clobber_r15:
+ xorq %r15,%r15
+ .byte 0xf3,0xc3
+.size abi_test_clobber_r15,.-abi_test_clobber_r15
+.type abi_test_clobber_xmm0, @function
+.globl abi_test_clobber_xmm0
+.hidden abi_test_clobber_xmm0
+.align 16
+abi_test_clobber_xmm0:
+ pxor %xmm0,%xmm0
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm0,.-abi_test_clobber_xmm0
+.type abi_test_clobber_xmm1, @function
+.globl abi_test_clobber_xmm1
+.hidden abi_test_clobber_xmm1
+.align 16
+abi_test_clobber_xmm1:
+ pxor %xmm1,%xmm1
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm1,.-abi_test_clobber_xmm1
+.type abi_test_clobber_xmm2, @function
+.globl abi_test_clobber_xmm2
+.hidden abi_test_clobber_xmm2
+.align 16
+abi_test_clobber_xmm2:
+ pxor %xmm2,%xmm2
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm2,.-abi_test_clobber_xmm2
+.type abi_test_clobber_xmm3, @function
+.globl abi_test_clobber_xmm3
+.hidden abi_test_clobber_xmm3
+.align 16
+abi_test_clobber_xmm3:
+ pxor %xmm3,%xmm3
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm3,.-abi_test_clobber_xmm3
+.type abi_test_clobber_xmm4, @function
+.globl abi_test_clobber_xmm4
+.hidden abi_test_clobber_xmm4
+.align 16
+abi_test_clobber_xmm4:
+ pxor %xmm4,%xmm4
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm4,.-abi_test_clobber_xmm4
+.type abi_test_clobber_xmm5, @function
+.globl abi_test_clobber_xmm5
+.hidden abi_test_clobber_xmm5
+.align 16
+abi_test_clobber_xmm5:
+ pxor %xmm5,%xmm5
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm5,.-abi_test_clobber_xmm5
+.type abi_test_clobber_xmm6, @function
+.globl abi_test_clobber_xmm6
+.hidden abi_test_clobber_xmm6
+.align 16
+abi_test_clobber_xmm6:
+ pxor %xmm6,%xmm6
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm6,.-abi_test_clobber_xmm6
+.type abi_test_clobber_xmm7, @function
+.globl abi_test_clobber_xmm7
+.hidden abi_test_clobber_xmm7
+.align 16
+abi_test_clobber_xmm7:
+ pxor %xmm7,%xmm7
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm7,.-abi_test_clobber_xmm7
+.type abi_test_clobber_xmm8, @function
+.globl abi_test_clobber_xmm8
+.hidden abi_test_clobber_xmm8
+.align 16
+abi_test_clobber_xmm8:
+ pxor %xmm8,%xmm8
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm8,.-abi_test_clobber_xmm8
+.type abi_test_clobber_xmm9, @function
+.globl abi_test_clobber_xmm9
+.hidden abi_test_clobber_xmm9
+.align 16
+abi_test_clobber_xmm9:
+ pxor %xmm9,%xmm9
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm9,.-abi_test_clobber_xmm9
+.type abi_test_clobber_xmm10, @function
+.globl abi_test_clobber_xmm10
+.hidden abi_test_clobber_xmm10
+.align 16
+abi_test_clobber_xmm10:
+ pxor %xmm10,%xmm10
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm10,.-abi_test_clobber_xmm10
+.type abi_test_clobber_xmm11, @function
+.globl abi_test_clobber_xmm11
+.hidden abi_test_clobber_xmm11
+.align 16
+abi_test_clobber_xmm11:
+ pxor %xmm11,%xmm11
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm11,.-abi_test_clobber_xmm11
+.type abi_test_clobber_xmm12, @function
+.globl abi_test_clobber_xmm12
+.hidden abi_test_clobber_xmm12
+.align 16
+abi_test_clobber_xmm12:
+ pxor %xmm12,%xmm12
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm12,.-abi_test_clobber_xmm12
+.type abi_test_clobber_xmm13, @function
+.globl abi_test_clobber_xmm13
+.hidden abi_test_clobber_xmm13
+.align 16
+abi_test_clobber_xmm13:
+ pxor %xmm13,%xmm13
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm13,.-abi_test_clobber_xmm13
+.type abi_test_clobber_xmm14, @function
+.globl abi_test_clobber_xmm14
+.hidden abi_test_clobber_xmm14
+.align 16
+abi_test_clobber_xmm14:
+ pxor %xmm14,%xmm14
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm14,.-abi_test_clobber_xmm14
+.type abi_test_clobber_xmm15, @function
+.globl abi_test_clobber_xmm15
+.hidden abi_test_clobber_xmm15
+.align 16
+abi_test_clobber_xmm15:
+ pxor %xmm15,%xmm15
+ .byte 0xf3,0xc3
+.size abi_test_clobber_xmm15,.-abi_test_clobber_xmm15
+
+
+
+.type abi_test_bad_unwind_wrong_register, @function
+.globl abi_test_bad_unwind_wrong_register
+.hidden abi_test_bad_unwind_wrong_register
+.align 16
+abi_test_bad_unwind_wrong_register:
+.cfi_startproc
+.Labi_test_bad_unwind_wrong_register_seh_begin:
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-16
+.Labi_test_bad_unwind_wrong_register_seh_push_r13:
+
+
+
+ nop
+ popq %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r12
+ .byte 0xf3,0xc3
+.Labi_test_bad_unwind_wrong_register_seh_end:
+.cfi_endproc
+.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
+
+
+
+
+.type abi_test_bad_unwind_temporary, @function
+.globl abi_test_bad_unwind_temporary
+.hidden abi_test_bad_unwind_temporary
+.align 16
+abi_test_bad_unwind_temporary:
+.cfi_startproc
+.Labi_test_bad_unwind_temporary_seh_begin:
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
+.Labi_test_bad_unwind_temporary_seh_push_r12:
+
+ movq %r12,%rax
+ incq %rax
+ movq %rax,(%rsp)
+
+
+
+ movq %r12,(%rsp)
+
+
+ popq %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r12
+ .byte 0xf3,0xc3
+.Labi_test_bad_unwind_temporary_seh_end:
+.cfi_endproc
+.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
+
+
+
+
+.type abi_test_set_direction_flag, @function
+.globl abi_test_get_and_clear_direction_flag
+.hidden abi_test_get_and_clear_direction_flag
+abi_test_get_and_clear_direction_flag:
+ pushfq
+ popq %rax
+ andq $0x400,%rax
+ shrq $10,%rax
+ cld
+ .byte 0xf3,0xc3
+.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag
+
+
+
+.type abi_test_set_direction_flag, @function
+.globl abi_test_set_direction_flag
+.hidden abi_test_set_direction_flag
+abi_test_set_direction_flag:
+ std
+ .byte 0xf3,0xc3
+.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
+#endif