summaryrefslogtreecommitdiff
path: root/win-x86_64
diff options
context:
space:
mode:
authorDaulet Zhanguzin <dauletz@google.com>2020-08-12 12:46:27 +0100
committerDaulet Zhanguzin <dauletz@google.com>2020-08-24 12:46:36 +0100
commitc960c43412e0632abb712fc465e70b6dfa2e9657 (patch)
treec48455b0f878c45496c3390cf2e88b413dc85a34 /win-x86_64
parentba9db8781ec11859132f543aebff5d6093214751 (diff)
downloadboringssl-c960c43412e0632abb712fc465e70b6dfa2e9657.tar.gz
external/boringssl: Sync to a0b49d63fdc33e54eac93674c86891d15d181d87.
This includes the following changes: https://boringssl.googlesource.com/boringssl/+log/2fb729d4f36beaf263ad85e24a790b571652679c..a0b49d63fdc33e54eac93674c86891d15d181d87 Test: atest CtsLibcoreTestCases Change-Id: Ida4794d56d237422351b9ddcc7d0bc9295e816e1
Diffstat (limited to 'win-x86_64')
-rw-r--r--win-x86_64/crypto/fipsmodule/ghash-x86_64.asm2
-rw-r--r--win-x86_64/crypto/fipsmodule/sha1-x86_64.asm1960
2 files changed, 1960 insertions, 2 deletions
diff --git a/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm b/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
index c8742a4f..194ea8df 100644
--- a/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
+++ b/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
@@ -1177,8 +1177,6 @@ $L$0x1c2_polynomial:
DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
$L$7_mask:
DD 7,0,7,0
-$L$7_mask_poly:
- DD 7,0,450,0
ALIGN 64
DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
diff --git a/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm b/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
index 62dcc62c..1654df1d 100644
--- a/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
+++ b/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
@@ -33,6 +33,11 @@ $L$SEH_begin_sha1_block_data_order:
mov r10d,DWORD[8+r10]
test r8d,512
jz NEAR $L$ialu
+ test r10d,536870912
+ jnz NEAR _shaext_shortcut
+ and r10d,296
+ cmp r10d,296
+ je NEAR _avx2_shortcut
and r8d,268435456
and r9d,1073741824
or r8d,r9d
@@ -1275,6 +1280,198 @@ $L$epilogue:
$L$SEH_end_sha1_block_data_order:
+ALIGN 32
+sha1_block_data_order_shaext:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_block_data_order_shaext:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+_shaext_shortcut:
+
+ lea rsp,[((-72))+rsp]
+ movaps XMMWORD[(-8-64)+rax],xmm6
+ movaps XMMWORD[(-8-48)+rax],xmm7
+ movaps XMMWORD[(-8-32)+rax],xmm8
+ movaps XMMWORD[(-8-16)+rax],xmm9
+$L$prologue_shaext:
+ movdqu xmm0,XMMWORD[rdi]
+ movd xmm1,DWORD[16+rdi]
+ movdqa xmm3,XMMWORD[((K_XX_XX+160))]
+
+ movdqu xmm4,XMMWORD[rsi]
+ pshufd xmm0,xmm0,27
+ movdqu xmm5,XMMWORD[16+rsi]
+ pshufd xmm1,xmm1,27
+ movdqu xmm6,XMMWORD[32+rsi]
+DB 102,15,56,0,227
+ movdqu xmm7,XMMWORD[48+rsi]
+DB 102,15,56,0,235
+DB 102,15,56,0,243
+ movdqa xmm9,xmm1
+DB 102,15,56,0,251
+ jmp NEAR $L$oop_shaext
+
+ALIGN 16
+$L$oop_shaext:
+ dec rdx
+ lea r8,[64+rsi]
+ paddd xmm1,xmm4
+ cmovne rsi,r8
+ movdqa xmm8,xmm0
+DB 15,56,201,229
+ movdqa xmm2,xmm0
+DB 15,58,204,193,0
+DB 15,56,200,213
+ pxor xmm4,xmm6
+DB 15,56,201,238
+DB 15,56,202,231
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,0
+DB 15,56,200,206
+ pxor xmm5,xmm7
+DB 15,56,202,236
+DB 15,56,201,247
+ movdqa xmm2,xmm0
+DB 15,58,204,193,0
+DB 15,56,200,215
+ pxor xmm6,xmm4
+DB 15,56,201,252
+DB 15,56,202,245
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,0
+DB 15,56,200,204
+ pxor xmm7,xmm5
+DB 15,56,202,254
+DB 15,56,201,229
+ movdqa xmm2,xmm0
+DB 15,58,204,193,0
+DB 15,56,200,213
+ pxor xmm4,xmm6
+DB 15,56,201,238
+DB 15,56,202,231
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,1
+DB 15,56,200,206
+ pxor xmm5,xmm7
+DB 15,56,202,236
+DB 15,56,201,247
+ movdqa xmm2,xmm0
+DB 15,58,204,193,1
+DB 15,56,200,215
+ pxor xmm6,xmm4
+DB 15,56,201,252
+DB 15,56,202,245
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,1
+DB 15,56,200,204
+ pxor xmm7,xmm5
+DB 15,56,202,254
+DB 15,56,201,229
+ movdqa xmm2,xmm0
+DB 15,58,204,193,1
+DB 15,56,200,213
+ pxor xmm4,xmm6
+DB 15,56,201,238
+DB 15,56,202,231
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,1
+DB 15,56,200,206
+ pxor xmm5,xmm7
+DB 15,56,202,236
+DB 15,56,201,247
+ movdqa xmm2,xmm0
+DB 15,58,204,193,2
+DB 15,56,200,215
+ pxor xmm6,xmm4
+DB 15,56,201,252
+DB 15,56,202,245
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,2
+DB 15,56,200,204
+ pxor xmm7,xmm5
+DB 15,56,202,254
+DB 15,56,201,229
+ movdqa xmm2,xmm0
+DB 15,58,204,193,2
+DB 15,56,200,213
+ pxor xmm4,xmm6
+DB 15,56,201,238
+DB 15,56,202,231
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,2
+DB 15,56,200,206
+ pxor xmm5,xmm7
+DB 15,56,202,236
+DB 15,56,201,247
+ movdqa xmm2,xmm0
+DB 15,58,204,193,2
+DB 15,56,200,215
+ pxor xmm6,xmm4
+DB 15,56,201,252
+DB 15,56,202,245
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,3
+DB 15,56,200,204
+ pxor xmm7,xmm5
+DB 15,56,202,254
+ movdqu xmm4,XMMWORD[rsi]
+ movdqa xmm2,xmm0
+DB 15,58,204,193,3
+DB 15,56,200,213
+ movdqu xmm5,XMMWORD[16+rsi]
+DB 102,15,56,0,227
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,3
+DB 15,56,200,206
+ movdqu xmm6,XMMWORD[32+rsi]
+DB 102,15,56,0,235
+
+ movdqa xmm2,xmm0
+DB 15,58,204,193,3
+DB 15,56,200,215
+ movdqu xmm7,XMMWORD[48+rsi]
+DB 102,15,56,0,243
+
+ movdqa xmm1,xmm0
+DB 15,58,204,194,3
+DB 65,15,56,200,201
+DB 102,15,56,0,251
+
+ paddd xmm0,xmm8
+ movdqa xmm9,xmm1
+
+ jnz NEAR $L$oop_shaext
+
+ pshufd xmm0,xmm0,27
+ pshufd xmm1,xmm1,27
+ movdqu XMMWORD[rdi],xmm0
+ movd DWORD[16+rdi],xmm1
+ movaps xmm6,XMMWORD[((-8-64))+rax]
+ movaps xmm7,XMMWORD[((-8-48))+rax]
+ movaps xmm8,XMMWORD[((-8-32))+rax]
+ movaps xmm9,XMMWORD[((-8-16))+rax]
+ mov rsp,rax
+$L$epilogue_shaext:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha1_block_data_order_shaext:
+
ALIGN 16
sha1_block_data_order_ssse3:
mov QWORD[8+rsp],rdi ;WIN64 prologue
@@ -3638,6 +3835,1724 @@ $L$epilogue_avx:
DB 0F3h,0C3h ;repret
$L$SEH_end_sha1_block_data_order_avx:
+
+ALIGN 16
+sha1_block_data_order_avx2:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_block_data_order_avx2:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+_avx2_shortcut:
+
+ mov r11,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ vzeroupper
+ lea rsp,[((-96))+rsp]
+ vmovaps XMMWORD[(-40-96)+r11],xmm6
+ vmovaps XMMWORD[(-40-80)+r11],xmm7
+ vmovaps XMMWORD[(-40-64)+r11],xmm8
+ vmovaps XMMWORD[(-40-48)+r11],xmm9
+ vmovaps XMMWORD[(-40-32)+r11],xmm10
+ vmovaps XMMWORD[(-40-16)+r11],xmm11
+$L$prologue_avx2:
+ mov r8,rdi
+ mov r9,rsi
+ mov r10,rdx
+
+ lea rsp,[((-640))+rsp]
+ shl r10,6
+ lea r13,[64+r9]
+ and rsp,-128
+ add r10,r9
+ lea r14,[((K_XX_XX+64))]
+
+ mov eax,DWORD[r8]
+ cmp r13,r10
+ cmovae r13,r9
+ mov ebp,DWORD[4+r8]
+ mov ecx,DWORD[8+r8]
+ mov edx,DWORD[12+r8]
+ mov esi,DWORD[16+r8]
+ vmovdqu ymm6,YMMWORD[64+r14]
+
+ vmovdqu xmm0,XMMWORD[r9]
+ vmovdqu xmm1,XMMWORD[16+r9]
+ vmovdqu xmm2,XMMWORD[32+r9]
+ vmovdqu xmm3,XMMWORD[48+r9]
+ lea r9,[64+r9]
+ vinserti128 ymm0,ymm0,XMMWORD[r13],1
+ vinserti128 ymm1,ymm1,XMMWORD[16+r13],1
+ vpshufb ymm0,ymm0,ymm6
+ vinserti128 ymm2,ymm2,XMMWORD[32+r13],1
+ vpshufb ymm1,ymm1,ymm6
+ vinserti128 ymm3,ymm3,XMMWORD[48+r13],1
+ vpshufb ymm2,ymm2,ymm6
+ vmovdqu ymm11,YMMWORD[((-64))+r14]
+ vpshufb ymm3,ymm3,ymm6
+
+ vpaddd ymm4,ymm0,ymm11
+ vpaddd ymm5,ymm1,ymm11
+ vmovdqu YMMWORD[rsp],ymm4
+ vpaddd ymm6,ymm2,ymm11
+ vmovdqu YMMWORD[32+rsp],ymm5
+ vpaddd ymm7,ymm3,ymm11
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vmovdqu YMMWORD[96+rsp],ymm7
+ vpalignr ymm4,ymm1,ymm0,8
+ vpsrldq ymm8,ymm3,4
+ vpxor ymm4,ymm4,ymm0
+ vpxor ymm8,ymm8,ymm2
+ vpxor ymm4,ymm4,ymm8
+ vpsrld ymm8,ymm4,31
+ vpslldq ymm10,ymm4,12
+ vpaddd ymm4,ymm4,ymm4
+ vpsrld ymm9,ymm10,30
+ vpor ymm4,ymm4,ymm8
+ vpslld ymm10,ymm10,2
+ vpxor ymm4,ymm4,ymm9
+ vpxor ymm4,ymm4,ymm10
+ vpaddd ymm9,ymm4,ymm11
+ vmovdqu YMMWORD[128+rsp],ymm9
+ vpalignr ymm5,ymm2,ymm1,8
+ vpsrldq ymm8,ymm4,4
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm8,ymm8,ymm3
+ vpxor ymm5,ymm5,ymm8
+ vpsrld ymm8,ymm5,31
+ vmovdqu ymm11,YMMWORD[((-32))+r14]
+ vpslldq ymm10,ymm5,12
+ vpaddd ymm5,ymm5,ymm5
+ vpsrld ymm9,ymm10,30
+ vpor ymm5,ymm5,ymm8
+ vpslld ymm10,ymm10,2
+ vpxor ymm5,ymm5,ymm9
+ vpxor ymm5,ymm5,ymm10
+ vpaddd ymm9,ymm5,ymm11
+ vmovdqu YMMWORD[160+rsp],ymm9
+ vpalignr ymm6,ymm3,ymm2,8
+ vpsrldq ymm8,ymm5,4
+ vpxor ymm6,ymm6,ymm2
+ vpxor ymm8,ymm8,ymm4
+ vpxor ymm6,ymm6,ymm8
+ vpsrld ymm8,ymm6,31
+ vpslldq ymm10,ymm6,12
+ vpaddd ymm6,ymm6,ymm6
+ vpsrld ymm9,ymm10,30
+ vpor ymm6,ymm6,ymm8
+ vpslld ymm10,ymm10,2
+ vpxor ymm6,ymm6,ymm9
+ vpxor ymm6,ymm6,ymm10
+ vpaddd ymm9,ymm6,ymm11
+ vmovdqu YMMWORD[192+rsp],ymm9
+ vpalignr ymm7,ymm4,ymm3,8
+ vpsrldq ymm8,ymm6,4
+ vpxor ymm7,ymm7,ymm3
+ vpxor ymm8,ymm8,ymm5
+ vpxor ymm7,ymm7,ymm8
+ vpsrld ymm8,ymm7,31
+ vpslldq ymm10,ymm7,12
+ vpaddd ymm7,ymm7,ymm7
+ vpsrld ymm9,ymm10,30
+ vpor ymm7,ymm7,ymm8
+ vpslld ymm10,ymm10,2
+ vpxor ymm7,ymm7,ymm9
+ vpxor ymm7,ymm7,ymm10
+ vpaddd ymm9,ymm7,ymm11
+ vmovdqu YMMWORD[224+rsp],ymm9
+ lea r13,[128+rsp]
+ jmp NEAR $L$oop_avx2
+ALIGN 32
+$L$oop_avx2:
+ rorx ebx,ebp,2
+ andn edi,ebp,edx
+ and ebp,ecx
+ xor ebp,edi
+ jmp NEAR $L$align32_1
+ALIGN 32
+$L$align32_1:
+ vpalignr ymm8,ymm7,ymm6,8
+ vpxor ymm0,ymm0,ymm4
+ add esi,DWORD[((-128))+r13]
+ andn edi,eax,ecx
+ vpxor ymm0,ymm0,ymm1
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ vpxor ymm0,ymm0,ymm8
+ and eax,ebx
+ add esi,r12d
+ xor eax,edi
+ vpsrld ymm8,ymm0,30
+ vpslld ymm0,ymm0,2
+ add edx,DWORD[((-124))+r13]
+ andn edi,esi,ebx
+ add edx,eax
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ and esi,ebp
+ vpor ymm0,ymm0,ymm8
+ add edx,r12d
+ xor esi,edi
+ add ecx,DWORD[((-120))+r13]
+ andn edi,edx,ebp
+ vpaddd ymm9,ymm0,ymm11
+ add ecx,esi
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ and edx,eax
+ vmovdqu YMMWORD[256+rsp],ymm9
+ add ecx,r12d
+ xor edx,edi
+ add ebx,DWORD[((-116))+r13]
+ andn edi,ecx,eax
+ add ebx,edx
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ and ecx,esi
+ add ebx,r12d
+ xor ecx,edi
+ add ebp,DWORD[((-96))+r13]
+ andn edi,ebx,esi
+ add ebp,ecx
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ and ebx,edx
+ add ebp,r12d
+ xor ebx,edi
+ vpalignr ymm8,ymm0,ymm7,8
+ vpxor ymm1,ymm1,ymm5
+ add eax,DWORD[((-92))+r13]
+ andn edi,ebp,edx
+ vpxor ymm1,ymm1,ymm2
+ add eax,ebx
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ vpxor ymm1,ymm1,ymm8
+ and ebp,ecx
+ add eax,r12d
+ xor ebp,edi
+ vpsrld ymm8,ymm1,30
+ vpslld ymm1,ymm1,2
+ add esi,DWORD[((-88))+r13]
+ andn edi,eax,ecx
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ vpor ymm1,ymm1,ymm8
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[((-84))+r13]
+ andn edi,esi,ebx
+ vpaddd ymm9,ymm1,ymm11
+ add edx,eax
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ and esi,ebp
+ vmovdqu YMMWORD[288+rsp],ymm9
+ add edx,r12d
+ xor esi,edi
+ add ecx,DWORD[((-64))+r13]
+ andn edi,edx,ebp
+ add ecx,esi
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ and edx,eax
+ add ecx,r12d
+ xor edx,edi
+ add ebx,DWORD[((-60))+r13]
+ andn edi,ecx,eax
+ add ebx,edx
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ and ecx,esi
+ add ebx,r12d
+ xor ecx,edi
+ vpalignr ymm8,ymm1,ymm0,8
+ vpxor ymm2,ymm2,ymm6
+ add ebp,DWORD[((-56))+r13]
+ andn edi,ebx,esi
+ vpxor ymm2,ymm2,ymm3
+ vmovdqu ymm11,YMMWORD[r14]
+ add ebp,ecx
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ vpxor ymm2,ymm2,ymm8
+ and ebx,edx
+ add ebp,r12d
+ xor ebx,edi
+ vpsrld ymm8,ymm2,30
+ vpslld ymm2,ymm2,2
+ add eax,DWORD[((-52))+r13]
+ andn edi,ebp,edx
+ add eax,ebx
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ and ebp,ecx
+ vpor ymm2,ymm2,ymm8
+ add eax,r12d
+ xor ebp,edi
+ add esi,DWORD[((-32))+r13]
+ andn edi,eax,ecx
+ vpaddd ymm9,ymm2,ymm11
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ vmovdqu YMMWORD[320+rsp],ymm9
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[((-28))+r13]
+ andn edi,esi,ebx
+ add edx,eax
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ and esi,ebp
+ add edx,r12d
+ xor esi,edi
+ add ecx,DWORD[((-24))+r13]
+ andn edi,edx,ebp
+ add ecx,esi
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ and edx,eax
+ add ecx,r12d
+ xor edx,edi
+ vpalignr ymm8,ymm2,ymm1,8
+ vpxor ymm3,ymm3,ymm7
+ add ebx,DWORD[((-20))+r13]
+ andn edi,ecx,eax
+ vpxor ymm3,ymm3,ymm4
+ add ebx,edx
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ vpxor ymm3,ymm3,ymm8
+ and ecx,esi
+ add ebx,r12d
+ xor ecx,edi
+ vpsrld ymm8,ymm3,30
+ vpslld ymm3,ymm3,2
+ add ebp,DWORD[r13]
+ andn edi,ebx,esi
+ add ebp,ecx
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ and ebx,edx
+ vpor ymm3,ymm3,ymm8
+ add ebp,r12d
+ xor ebx,edi
+ add eax,DWORD[4+r13]
+ andn edi,ebp,edx
+ vpaddd ymm9,ymm3,ymm11
+ add eax,ebx
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ and ebp,ecx
+ vmovdqu YMMWORD[352+rsp],ymm9
+ add eax,r12d
+ xor ebp,edi
+ add esi,DWORD[8+r13]
+ andn edi,eax,ecx
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[12+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ vpalignr ymm8,ymm3,ymm2,8
+ vpxor ymm4,ymm4,ymm0
+ add ecx,DWORD[32+r13]
+ lea ecx,[rsi*1+rcx]
+ vpxor ymm4,ymm4,ymm5
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ vpxor ymm4,ymm4,ymm8
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[36+r13]
+ vpsrld ymm8,ymm4,30
+ vpslld ymm4,ymm4,2
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ vpor ymm4,ymm4,ymm8
+ add ebp,DWORD[40+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ vpaddd ymm9,ymm4,ymm11
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[44+r13]
+ vmovdqu YMMWORD[384+rsp],ymm9
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[64+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ vpalignr ymm8,ymm4,ymm3,8
+ vpxor ymm5,ymm5,ymm1
+ add edx,DWORD[68+r13]
+ lea edx,[rax*1+rdx]
+ vpxor ymm5,ymm5,ymm6
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ vpxor ymm5,ymm5,ymm8
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[72+r13]
+ vpsrld ymm8,ymm5,30
+ vpslld ymm5,ymm5,2
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ vpor ymm5,ymm5,ymm8
+ add ebx,DWORD[76+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ vpaddd ymm9,ymm5,ymm11
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[96+r13]
+ vmovdqu YMMWORD[416+rsp],ymm9
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[100+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ vpalignr ymm8,ymm5,ymm4,8
+ vpxor ymm6,ymm6,ymm2
+ add esi,DWORD[104+r13]
+ lea esi,[rbp*1+rsi]
+ vpxor ymm6,ymm6,ymm7
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ vpxor ymm6,ymm6,ymm8
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[108+r13]
+ lea r13,[256+r13]
+ vpsrld ymm8,ymm6,30
+ vpslld ymm6,ymm6,2
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ vpor ymm6,ymm6,ymm8
+ add ecx,DWORD[((-128))+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ vpaddd ymm9,ymm6,ymm11
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-124))+r13]
+ vmovdqu YMMWORD[448+rsp],ymm9
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[((-120))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ vpalignr ymm8,ymm6,ymm5,8
+ vpxor ymm7,ymm7,ymm3
+ add eax,DWORD[((-116))+r13]
+ lea eax,[rbx*1+rax]
+ vpxor ymm7,ymm7,ymm0
+ vmovdqu ymm11,YMMWORD[32+r14]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ vpxor ymm7,ymm7,ymm8
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[((-96))+r13]
+ vpsrld ymm8,ymm7,30
+ vpslld ymm7,ymm7,2
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ vpor ymm7,ymm7,ymm8
+ add edx,DWORD[((-92))+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ vpaddd ymm9,ymm7,ymm11
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[((-88))+r13]
+ vmovdqu YMMWORD[480+rsp],ymm9
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-84))+r13]
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ and ecx,edi
+ jmp NEAR $L$align32_2
+ALIGN 32
+$L$align32_2:
+ vpalignr ymm8,ymm7,ymm6,8
+ vpxor ymm0,ymm0,ymm4
+ add ebp,DWORD[((-64))+r13]
+ xor ecx,esi
+ vpxor ymm0,ymm0,ymm1
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ vpxor ymm0,ymm0,ymm8
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ vpsrld ymm8,ymm0,30
+ vpslld ymm0,ymm0,2
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[((-60))+r13]
+ xor ebx,edx
+ mov edi,ecx
+ xor edi,edx
+ vpor ymm0,ymm0,ymm8
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ vpaddd ymm9,ymm0,ymm11
+ add eax,r12d
+ and ebp,edi
+ add esi,DWORD[((-56))+r13]
+ xor ebp,ecx
+ vmovdqu YMMWORD[512+rsp],ymm9
+ mov edi,ebx
+ xor edi,ecx
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ and eax,edi
+ add edx,DWORD[((-52))+r13]
+ xor eax,ebx
+ mov edi,ebp
+ xor edi,ebx
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ and esi,edi
+ add ecx,DWORD[((-32))+r13]
+ xor esi,ebp
+ mov edi,eax
+ xor edi,ebp
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ and edx,edi
+ vpalignr ymm8,ymm0,ymm7,8
+ vpxor ymm1,ymm1,ymm5
+ add ebx,DWORD[((-28))+r13]
+ xor edx,eax
+ vpxor ymm1,ymm1,ymm2
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ vpxor ymm1,ymm1,ymm8
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ vpsrld ymm8,ymm1,30
+ vpslld ymm1,ymm1,2
+ add ebx,r12d
+ and ecx,edi
+ add ebp,DWORD[((-24))+r13]
+ xor ecx,esi
+ mov edi,edx
+ xor edi,esi
+ vpor ymm1,ymm1,ymm8
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ vpaddd ymm9,ymm1,ymm11
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[((-20))+r13]
+ xor ebx,edx
+ vmovdqu YMMWORD[544+rsp],ymm9
+ mov edi,ecx
+ xor edi,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ and ebp,edi
+ add esi,DWORD[r13]
+ xor ebp,ecx
+ mov edi,ebx
+ xor edi,ecx
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ and eax,edi
+ add edx,DWORD[4+r13]
+ xor eax,ebx
+ mov edi,ebp
+ xor edi,ebx
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ and esi,edi
+ vpalignr ymm8,ymm1,ymm0,8
+ vpxor ymm2,ymm2,ymm6
+ add ecx,DWORD[8+r13]
+ xor esi,ebp
+ vpxor ymm2,ymm2,ymm3
+ mov edi,eax
+ xor edi,ebp
+ lea ecx,[rsi*1+rcx]
+ vpxor ymm2,ymm2,ymm8
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ vpsrld ymm8,ymm2,30
+ vpslld ymm2,ymm2,2
+ add ecx,r12d
+ and edx,edi
+ add ebx,DWORD[12+r13]
+ xor edx,eax
+ mov edi,esi
+ xor edi,eax
+ vpor ymm2,ymm2,ymm8
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ vpaddd ymm9,ymm2,ymm11
+ add ebx,r12d
+ and ecx,edi
+ add ebp,DWORD[32+r13]
+ xor ecx,esi
+ vmovdqu YMMWORD[576+rsp],ymm9
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[36+r13]
+ xor ebx,edx
+ mov edi,ecx
+ xor edi,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ and ebp,edi
+ add esi,DWORD[40+r13]
+ xor ebp,ecx
+ mov edi,ebx
+ xor edi,ecx
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ and eax,edi
+ vpalignr ymm8,ymm2,ymm1,8
+ vpxor ymm3,ymm3,ymm7
+ add edx,DWORD[44+r13]
+ xor eax,ebx
+ vpxor ymm3,ymm3,ymm4
+ mov edi,ebp
+ xor edi,ebx
+ lea edx,[rax*1+rdx]
+ vpxor ymm3,ymm3,ymm8
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ vpsrld ymm8,ymm3,30
+ vpslld ymm3,ymm3,2
+ add edx,r12d
+ and esi,edi
+ add ecx,DWORD[64+r13]
+ xor esi,ebp
+ mov edi,eax
+ xor edi,ebp
+ vpor ymm3,ymm3,ymm8
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ vpaddd ymm9,ymm3,ymm11
+ add ecx,r12d
+ and edx,edi
+ add ebx,DWORD[68+r13]
+ xor edx,eax
+ vmovdqu YMMWORD[608+rsp],ymm9
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ and ecx,edi
+ add ebp,DWORD[72+r13]
+ xor ecx,esi
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[76+r13]
+ xor ebx,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[96+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[100+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[104+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[108+r13]
+ lea r13,[256+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[((-128))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[((-124))+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[((-120))+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[((-116))+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[((-96))+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-92))+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[((-88))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[((-84))+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[((-64))+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[((-60))+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[((-56))+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-52))+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[((-32))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[((-28))+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[((-24))+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[((-20))+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ add edx,r12d
+ lea r13,[128+r9]
+ lea rdi,[128+r9]
+ cmp r13,r10
+ cmovae r13,r9
+
+
+ add edx,DWORD[r8]
+ add esi,DWORD[4+r8]
+ add ebp,DWORD[8+r8]
+ mov DWORD[r8],edx
+ add ebx,DWORD[12+r8]
+ mov DWORD[4+r8],esi
+ mov eax,edx
+ add ecx,DWORD[16+r8]
+ mov r12d,ebp
+ mov DWORD[8+r8],ebp
+ mov edx,ebx
+
+ mov DWORD[12+r8],ebx
+ mov ebp,esi
+ mov DWORD[16+r8],ecx
+
+ mov esi,ecx
+ mov ecx,r12d
+
+
+ cmp r9,r10
+ je NEAR $L$done_avx2
+ vmovdqu ymm6,YMMWORD[64+r14]
+ cmp rdi,r10
+ ja NEAR $L$ast_avx2
+
+ vmovdqu xmm0,XMMWORD[((-64))+rdi]
+ vmovdqu xmm1,XMMWORD[((-48))+rdi]
+ vmovdqu xmm2,XMMWORD[((-32))+rdi]
+ vmovdqu xmm3,XMMWORD[((-16))+rdi]
+ vinserti128 ymm0,ymm0,XMMWORD[r13],1
+ vinserti128 ymm1,ymm1,XMMWORD[16+r13],1
+ vinserti128 ymm2,ymm2,XMMWORD[32+r13],1
+ vinserti128 ymm3,ymm3,XMMWORD[48+r13],1
+ jmp NEAR $L$ast_avx2
+
+ALIGN 32
+$L$ast_avx2:
+ lea r13,[((128+16))+rsp]
+ rorx ebx,ebp,2
+ andn edi,ebp,edx
+ and ebp,ecx
+ xor ebp,edi
+ sub r9,-128
+ add esi,DWORD[((-128))+r13]
+ andn edi,eax,ecx
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[((-124))+r13]
+ andn edi,esi,ebx
+ add edx,eax
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ and esi,ebp
+ add edx,r12d
+ xor esi,edi
+ add ecx,DWORD[((-120))+r13]
+ andn edi,edx,ebp
+ add ecx,esi
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ and edx,eax
+ add ecx,r12d
+ xor edx,edi
+ add ebx,DWORD[((-116))+r13]
+ andn edi,ecx,eax
+ add ebx,edx
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ and ecx,esi
+ add ebx,r12d
+ xor ecx,edi
+ add ebp,DWORD[((-96))+r13]
+ andn edi,ebx,esi
+ add ebp,ecx
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ and ebx,edx
+ add ebp,r12d
+ xor ebx,edi
+ add eax,DWORD[((-92))+r13]
+ andn edi,ebp,edx
+ add eax,ebx
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ and ebp,ecx
+ add eax,r12d
+ xor ebp,edi
+ add esi,DWORD[((-88))+r13]
+ andn edi,eax,ecx
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[((-84))+r13]
+ andn edi,esi,ebx
+ add edx,eax
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ and esi,ebp
+ add edx,r12d
+ xor esi,edi
+ add ecx,DWORD[((-64))+r13]
+ andn edi,edx,ebp
+ add ecx,esi
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ and edx,eax
+ add ecx,r12d
+ xor edx,edi
+ add ebx,DWORD[((-60))+r13]
+ andn edi,ecx,eax
+ add ebx,edx
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ and ecx,esi
+ add ebx,r12d
+ xor ecx,edi
+ add ebp,DWORD[((-56))+r13]
+ andn edi,ebx,esi
+ add ebp,ecx
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ and ebx,edx
+ add ebp,r12d
+ xor ebx,edi
+ add eax,DWORD[((-52))+r13]
+ andn edi,ebp,edx
+ add eax,ebx
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ and ebp,ecx
+ add eax,r12d
+ xor ebp,edi
+ add esi,DWORD[((-32))+r13]
+ andn edi,eax,ecx
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[((-28))+r13]
+ andn edi,esi,ebx
+ add edx,eax
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ and esi,ebp
+ add edx,r12d
+ xor esi,edi
+ add ecx,DWORD[((-24))+r13]
+ andn edi,edx,ebp
+ add ecx,esi
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ and edx,eax
+ add ecx,r12d
+ xor edx,edi
+ add ebx,DWORD[((-20))+r13]
+ andn edi,ecx,eax
+ add ebx,edx
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ and ecx,esi
+ add ebx,r12d
+ xor ecx,edi
+ add ebp,DWORD[r13]
+ andn edi,ebx,esi
+ add ebp,ecx
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ and ebx,edx
+ add ebp,r12d
+ xor ebx,edi
+ add eax,DWORD[4+r13]
+ andn edi,ebp,edx
+ add eax,ebx
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ and ebp,ecx
+ add eax,r12d
+ xor ebp,edi
+ add esi,DWORD[8+r13]
+ andn edi,eax,ecx
+ add esi,ebp
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ and eax,ebx
+ add esi,r12d
+ xor eax,edi
+ add edx,DWORD[12+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[32+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[36+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[40+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[44+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[64+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ vmovdqu ymm11,YMMWORD[((-64))+r14]
+ vpshufb ymm0,ymm0,ymm6
+ add edx,DWORD[68+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[72+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[76+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[96+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[100+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ vpshufb ymm1,ymm1,ymm6
+ vpaddd ymm8,ymm0,ymm11
+ add esi,DWORD[104+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[108+r13]
+ lea r13,[256+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[((-128))+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-124))+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[((-120))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ vmovdqu YMMWORD[rsp],ymm8
+ vpshufb ymm2,ymm2,ymm6
+ vpaddd ymm9,ymm1,ymm11
+ add eax,DWORD[((-116))+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[((-96))+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[((-92))+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ add ecx,DWORD[((-88))+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-84))+r13]
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ and ecx,edi
+ vmovdqu YMMWORD[32+rsp],ymm9
+ vpshufb ymm3,ymm3,ymm6
+ vpaddd ymm6,ymm2,ymm11
+ add ebp,DWORD[((-64))+r13]
+ xor ecx,esi
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[((-60))+r13]
+ xor ebx,edx
+ mov edi,ecx
+ xor edi,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ and ebp,edi
+ add esi,DWORD[((-56))+r13]
+ xor ebp,ecx
+ mov edi,ebx
+ xor edi,ecx
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ and eax,edi
+ add edx,DWORD[((-52))+r13]
+ xor eax,ebx
+ mov edi,ebp
+ xor edi,ebx
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ and esi,edi
+ add ecx,DWORD[((-32))+r13]
+ xor esi,ebp
+ mov edi,eax
+ xor edi,ebp
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ and edx,edi
+ jmp NEAR $L$align32_3
+ALIGN 32
+$L$align32_3:
+ vmovdqu YMMWORD[64+rsp],ymm6
+ vpaddd ymm7,ymm3,ymm11
+ add ebx,DWORD[((-28))+r13]
+ xor edx,eax
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ and ecx,edi
+ add ebp,DWORD[((-24))+r13]
+ xor ecx,esi
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[((-20))+r13]
+ xor ebx,edx
+ mov edi,ecx
+ xor edi,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ and ebp,edi
+ add esi,DWORD[r13]
+ xor ebp,ecx
+ mov edi,ebx
+ xor edi,ecx
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ and eax,edi
+ add edx,DWORD[4+r13]
+ xor eax,ebx
+ mov edi,ebp
+ xor edi,ebx
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ and esi,edi
+ vmovdqu YMMWORD[96+rsp],ymm7
+ add ecx,DWORD[8+r13]
+ xor esi,ebp
+ mov edi,eax
+ xor edi,ebp
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ and edx,edi
+ add ebx,DWORD[12+r13]
+ xor edx,eax
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ and ecx,edi
+ add ebp,DWORD[32+r13]
+ xor ecx,esi
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[36+r13]
+ xor ebx,edx
+ mov edi,ecx
+ xor edi,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ and ebp,edi
+ add esi,DWORD[40+r13]
+ xor ebp,ecx
+ mov edi,ebx
+ xor edi,ecx
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ and eax,edi
+ vpalignr ymm4,ymm1,ymm0,8
+ add edx,DWORD[44+r13]
+ xor eax,ebx
+ mov edi,ebp
+ xor edi,ebx
+ vpsrldq ymm8,ymm3,4
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ vpxor ymm4,ymm4,ymm0
+ vpxor ymm8,ymm8,ymm2
+ xor esi,ebp
+ add edx,r12d
+ vpxor ymm4,ymm4,ymm8
+ and esi,edi
+ add ecx,DWORD[64+r13]
+ xor esi,ebp
+ mov edi,eax
+ vpsrld ymm8,ymm4,31
+ xor edi,ebp
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ vpslldq ymm10,ymm4,12
+ vpaddd ymm4,ymm4,ymm4
+ rorx esi,edx,2
+ xor edx,eax
+ vpsrld ymm9,ymm10,30
+ vpor ymm4,ymm4,ymm8
+ add ecx,r12d
+ and edx,edi
+ vpslld ymm10,ymm10,2
+ vpxor ymm4,ymm4,ymm9
+ add ebx,DWORD[68+r13]
+ xor edx,eax
+ vpxor ymm4,ymm4,ymm10
+ mov edi,esi
+ xor edi,eax
+ lea ebx,[rdx*1+rbx]
+ vpaddd ymm9,ymm4,ymm11
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ vmovdqu YMMWORD[128+rsp],ymm9
+ add ebx,r12d
+ and ecx,edi
+ add ebp,DWORD[72+r13]
+ xor ecx,esi
+ mov edi,edx
+ xor edi,esi
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ and ebx,edi
+ add eax,DWORD[76+r13]
+ xor ebx,edx
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ vpalignr ymm5,ymm2,ymm1,8
+ add esi,DWORD[96+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ vpsrldq ymm8,ymm4,4
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ vpxor ymm5,ymm5,ymm1
+ vpxor ymm8,ymm8,ymm3
+ add edx,DWORD[100+r13]
+ lea edx,[rax*1+rdx]
+ vpxor ymm5,ymm5,ymm8
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ xor esi,ebp
+ add edx,r12d
+ vpsrld ymm8,ymm5,31
+ vmovdqu ymm11,YMMWORD[((-32))+r14]
+ xor esi,ebx
+ add ecx,DWORD[104+r13]
+ lea ecx,[rsi*1+rcx]
+ vpslldq ymm10,ymm5,12
+ vpaddd ymm5,ymm5,ymm5
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ vpsrld ymm9,ymm10,30
+ vpor ymm5,ymm5,ymm8
+ xor edx,eax
+ add ecx,r12d
+ vpslld ymm10,ymm10,2
+ vpxor ymm5,ymm5,ymm9
+ xor edx,ebp
+ add ebx,DWORD[108+r13]
+ lea r13,[256+r13]
+ vpxor ymm5,ymm5,ymm10
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ vpaddd ymm9,ymm5,ymm11
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ vmovdqu YMMWORD[160+rsp],ymm9
+ add ebp,DWORD[((-128))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ vpalignr ymm6,ymm3,ymm2,8
+ add eax,DWORD[((-124))+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ vpsrldq ymm8,ymm5,4
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ vpxor ymm6,ymm6,ymm2
+ vpxor ymm8,ymm8,ymm4
+ add esi,DWORD[((-120))+r13]
+ lea esi,[rbp*1+rsi]
+ vpxor ymm6,ymm6,ymm8
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ vpsrld ymm8,ymm6,31
+ xor eax,ecx
+ add edx,DWORD[((-116))+r13]
+ lea edx,[rax*1+rdx]
+ vpslldq ymm10,ymm6,12
+ vpaddd ymm6,ymm6,ymm6
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ vpsrld ymm9,ymm10,30
+ vpor ymm6,ymm6,ymm8
+ xor esi,ebp
+ add edx,r12d
+ vpslld ymm10,ymm10,2
+ vpxor ymm6,ymm6,ymm9
+ xor esi,ebx
+ add ecx,DWORD[((-96))+r13]
+ vpxor ymm6,ymm6,ymm10
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ vpaddd ymm9,ymm6,ymm11
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ vmovdqu YMMWORD[192+rsp],ymm9
+ add ebx,DWORD[((-92))+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ vpalignr ymm7,ymm4,ymm3,8
+ add ebp,DWORD[((-88))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ vpsrldq ymm8,ymm6,4
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ vpxor ymm7,ymm7,ymm3
+ vpxor ymm8,ymm8,ymm5
+ add eax,DWORD[((-84))+r13]
+ lea eax,[rbx*1+rax]
+ vpxor ymm7,ymm7,ymm8
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ vpsrld ymm8,ymm7,31
+ xor ebp,edx
+ add esi,DWORD[((-64))+r13]
+ lea esi,[rbp*1+rsi]
+ vpslldq ymm10,ymm7,12
+ vpaddd ymm7,ymm7,ymm7
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ vpsrld ymm9,ymm10,30
+ vpor ymm7,ymm7,ymm8
+ xor eax,ebx
+ add esi,r12d
+ vpslld ymm10,ymm10,2
+ vpxor ymm7,ymm7,ymm9
+ xor eax,ecx
+ add edx,DWORD[((-60))+r13]
+ vpxor ymm7,ymm7,ymm10
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ rorx eax,esi,2
+ vpaddd ymm9,ymm7,ymm11
+ xor esi,ebp
+ add edx,r12d
+ xor esi,ebx
+ vmovdqu YMMWORD[224+rsp],ymm9
+ add ecx,DWORD[((-56))+r13]
+ lea ecx,[rsi*1+rcx]
+ rorx r12d,edx,27
+ rorx esi,edx,2
+ xor edx,eax
+ add ecx,r12d
+ xor edx,ebp
+ add ebx,DWORD[((-52))+r13]
+ lea ebx,[rdx*1+rbx]
+ rorx r12d,ecx,27
+ rorx edx,ecx,2
+ xor ecx,esi
+ add ebx,r12d
+ xor ecx,eax
+ add ebp,DWORD[((-32))+r13]
+ lea ebp,[rbp*1+rcx]
+ rorx r12d,ebx,27
+ rorx ecx,ebx,2
+ xor ebx,edx
+ add ebp,r12d
+ xor ebx,esi
+ add eax,DWORD[((-28))+r13]
+ lea eax,[rbx*1+rax]
+ rorx r12d,ebp,27
+ rorx ebx,ebp,2
+ xor ebp,ecx
+ add eax,r12d
+ xor ebp,edx
+ add esi,DWORD[((-24))+r13]
+ lea esi,[rbp*1+rsi]
+ rorx r12d,eax,27
+ rorx ebp,eax,2
+ xor eax,ebx
+ add esi,r12d
+ xor eax,ecx
+ add edx,DWORD[((-20))+r13]
+ lea edx,[rax*1+rdx]
+ rorx r12d,esi,27
+ add edx,r12d
+ lea r13,[128+rsp]
+
+
+ add edx,DWORD[r8]
+ add esi,DWORD[4+r8]
+ add ebp,DWORD[8+r8]
+ mov DWORD[r8],edx
+ add ebx,DWORD[12+r8]
+ mov DWORD[4+r8],esi
+ mov eax,edx
+ add ecx,DWORD[16+r8]
+ mov r12d,ebp
+ mov DWORD[8+r8],ebp
+ mov edx,ebx
+
+ mov DWORD[12+r8],ebx
+ mov ebp,esi
+ mov DWORD[16+r8],ecx
+
+ mov esi,ecx
+ mov ecx,r12d
+
+
+ cmp r9,r10
+ jbe NEAR $L$oop_avx2
+
+$L$done_avx2:
+ vzeroupper
+ movaps xmm6,XMMWORD[((-40-96))+r11]
+ movaps xmm7,XMMWORD[((-40-80))+r11]
+ movaps xmm8,XMMWORD[((-40-64))+r11]
+ movaps xmm9,XMMWORD[((-40-48))+r11]
+ movaps xmm10,XMMWORD[((-40-32))+r11]
+ movaps xmm11,XMMWORD[((-40-16))+r11]
+ mov r14,QWORD[((-40))+r11]
+
+ mov r13,QWORD[((-32))+r11]
+
+ mov r12,QWORD[((-24))+r11]
+
+ mov rbp,QWORD[((-16))+r11]
+
+ mov rbx,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
+$L$epilogue_avx2:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha1_block_data_order_avx2:
ALIGN 64
K_XX_XX:
DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999
@@ -3702,6 +5617,38 @@ se_handler:
ALIGN 16
+shaext_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ lea r10,[$L$prologue_shaext]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ lea r10,[$L$epilogue_shaext]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ lea rsi,[((-8-64))+rax]
+ lea rdi,[512+r8]
+ mov ecx,8
+ DD 0xa548f3fc
+
+ jmp NEAR $L$common_seh_tail
+
+
+ALIGN 16
ssse3_handler:
push rsi
push rdi
@@ -3793,17 +5740,26 @@ ALIGN 4
DD $L$SEH_begin_sha1_block_data_order wrt ..imagebase
DD $L$SEH_end_sha1_block_data_order wrt ..imagebase
DD $L$SEH_info_sha1_block_data_order wrt ..imagebase
+ DD $L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase
+ DD $L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase
+ DD $L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase
DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase
DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase
DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase
DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase
DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase
DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase
+ DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase
+ DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase
+ DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_sha1_block_data_order:
DB 9,0,0,0
DD se_handler wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_shaext:
+DB 9,0,0,0
+ DD shaext_handler wrt ..imagebase
$L$SEH_info_sha1_block_data_order_ssse3:
DB 9,0,0,0
DD ssse3_handler wrt ..imagebase
@@ -3812,3 +5768,7 @@ $L$SEH_info_sha1_block_data_order_avx:
DB 9,0,0,0
DD ssse3_handler wrt ..imagebase
DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_avx2:
+DB 9,0,0,0
+ DD ssse3_handler wrt ..imagebase
+ DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase