diff options
author | Pete Bentley <prb@google.com> | 2020-08-27 15:48:43 +0000 |
---|---|---|
committer | Pete Bentley <prb@google.com> | 2020-08-27 15:59:16 +0000 |
commit | 5c2e48f59b14ad8a02c2b8bf14e91aa709813468 (patch) | |
tree | 4af9c3539d390b4449e38724bbc16228e5f59b95 /win-x86_64 | |
parent | c960c43412e0632abb712fc465e70b6dfa2e9657 (diff) | |
download | boringssl-5c2e48f59b14ad8a02c2b8bf14e91aa709813468.tar.gz |
Revert "external/boringssl: Sync to a0b49d63fdc33e54eac93674c86891d15d181d87."
This reverts commit c960c43412e0632abb712fc465e70b6dfa2e9657.
Reason for revert: <Breaks ART buildbot>
Bug: 166619732
Change-Id: I5c843090e06a33aca391cde9c2da2bcb16995f05
Diffstat (limited to 'win-x86_64')
-rw-r--r-- | win-x86_64/crypto/fipsmodule/ghash-x86_64.asm | 2 | ||||
-rw-r--r-- | win-x86_64/crypto/fipsmodule/sha1-x86_64.asm | 1960 |
2 files changed, 2 insertions, 1960 deletions
diff --git a/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm b/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm index 194ea8df..c8742a4f 100644 --- a/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm +++ b/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm @@ -1177,6 +1177,8 @@ $L$0x1c2_polynomial: DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 $L$7_mask: DD 7,0,7,0 +$L$7_mask_poly: + DD 7,0,450,0 ALIGN 64 DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 diff --git a/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm b/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm index 1654df1d..62dcc62c 100644 --- a/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm +++ b/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm @@ -33,11 +33,6 @@ $L$SEH_begin_sha1_block_data_order: mov r10d,DWORD[8+r10] test r8d,512 jz NEAR $L$ialu - test r10d,536870912 - jnz NEAR _shaext_shortcut - and r10d,296 - cmp r10d,296 - je NEAR _avx2_shortcut and r8d,268435456 and r9d,1073741824 or r8d,r9d @@ -1280,198 +1275,6 @@ $L$epilogue: $L$SEH_end_sha1_block_data_order: -ALIGN 32 -sha1_block_data_order_shaext: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_sha1_block_data_order_shaext: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - - -_shaext_shortcut: - - lea rsp,[((-72))+rsp] - movaps XMMWORD[(-8-64)+rax],xmm6 - movaps XMMWORD[(-8-48)+rax],xmm7 - movaps XMMWORD[(-8-32)+rax],xmm8 - movaps XMMWORD[(-8-16)+rax],xmm9 -$L$prologue_shaext: - movdqu xmm0,XMMWORD[rdi] - movd xmm1,DWORD[16+rdi] - movdqa xmm3,XMMWORD[((K_XX_XX+160))] - - movdqu xmm4,XMMWORD[rsi] - pshufd xmm0,xmm0,27 - movdqu xmm5,XMMWORD[16+rsi] - pshufd xmm1,xmm1,27 - movdqu xmm6,XMMWORD[32+rsi] -DB 102,15,56,0,227 - movdqu xmm7,XMMWORD[48+rsi] -DB 102,15,56,0,235 -DB 102,15,56,0,243 - movdqa xmm9,xmm1 -DB 102,15,56,0,251 - jmp NEAR $L$oop_shaext - -ALIGN 16 -$L$oop_shaext: - dec rdx - lea r8,[64+rsi] - paddd xmm1,xmm4 - cmovne rsi,r8 - movdqa xmm8,xmm0 -DB 15,56,201,229 - movdqa xmm2,xmm0 -DB 15,58,204,193,0 -DB 15,56,200,213 - pxor xmm4,xmm6 -DB 15,56,201,238 -DB 15,56,202,231 - - movdqa xmm1,xmm0 -DB 15,58,204,194,0 -DB 15,56,200,206 - pxor xmm5,xmm7 -DB 15,56,202,236 -DB 15,56,201,247 - movdqa xmm2,xmm0 -DB 15,58,204,193,0 -DB 15,56,200,215 - pxor xmm6,xmm4 -DB 15,56,201,252 -DB 15,56,202,245 - - movdqa xmm1,xmm0 -DB 15,58,204,194,0 -DB 15,56,200,204 - pxor xmm7,xmm5 -DB 15,56,202,254 -DB 15,56,201,229 - movdqa xmm2,xmm0 -DB 15,58,204,193,0 -DB 15,56,200,213 - pxor xmm4,xmm6 -DB 15,56,201,238 -DB 15,56,202,231 - - movdqa xmm1,xmm0 -DB 15,58,204,194,1 -DB 15,56,200,206 - pxor xmm5,xmm7 -DB 15,56,202,236 -DB 15,56,201,247 - movdqa xmm2,xmm0 -DB 15,58,204,193,1 -DB 15,56,200,215 - pxor xmm6,xmm4 -DB 15,56,201,252 -DB 15,56,202,245 - - movdqa xmm1,xmm0 -DB 15,58,204,194,1 -DB 15,56,200,204 - pxor xmm7,xmm5 -DB 15,56,202,254 -DB 15,56,201,229 - movdqa xmm2,xmm0 -DB 15,58,204,193,1 -DB 15,56,200,213 - pxor xmm4,xmm6 -DB 15,56,201,238 -DB 15,56,202,231 - - movdqa xmm1,xmm0 -DB 15,58,204,194,1 -DB 15,56,200,206 - pxor xmm5,xmm7 -DB 15,56,202,236 -DB 15,56,201,247 - movdqa xmm2,xmm0 -DB 15,58,204,193,2 -DB 15,56,200,215 - pxor xmm6,xmm4 -DB 15,56,201,252 -DB 15,56,202,245 - - movdqa xmm1,xmm0 -DB 15,58,204,194,2 -DB 15,56,200,204 - pxor xmm7,xmm5 -DB 15,56,202,254 -DB 15,56,201,229 - movdqa xmm2,xmm0 -DB 15,58,204,193,2 -DB 15,56,200,213 - pxor xmm4,xmm6 -DB 15,56,201,238 -DB 15,56,202,231 - - movdqa xmm1,xmm0 -DB 15,58,204,194,2 -DB 15,56,200,206 - pxor xmm5,xmm7 -DB 15,56,202,236 -DB 15,56,201,247 - movdqa xmm2,xmm0 -DB 15,58,204,193,2 -DB 15,56,200,215 - pxor xmm6,xmm4 -DB 15,56,201,252 -DB 15,56,202,245 - - movdqa xmm1,xmm0 -DB 15,58,204,194,3 -DB 15,56,200,204 - pxor xmm7,xmm5 -DB 15,56,202,254 - movdqu xmm4,XMMWORD[rsi] - movdqa xmm2,xmm0 -DB 15,58,204,193,3 -DB 15,56,200,213 - movdqu xmm5,XMMWORD[16+rsi] -DB 102,15,56,0,227 - - movdqa xmm1,xmm0 -DB 15,58,204,194,3 -DB 15,56,200,206 - movdqu xmm6,XMMWORD[32+rsi] -DB 102,15,56,0,235 - - movdqa xmm2,xmm0 -DB 15,58,204,193,3 -DB 15,56,200,215 - movdqu xmm7,XMMWORD[48+rsi] -DB 102,15,56,0,243 - - movdqa xmm1,xmm0 -DB 15,58,204,194,3 -DB 65,15,56,200,201 -DB 102,15,56,0,251 - - paddd xmm0,xmm8 - movdqa xmm9,xmm1 - - jnz NEAR $L$oop_shaext - - pshufd xmm0,xmm0,27 - pshufd xmm1,xmm1,27 - movdqu XMMWORD[rdi],xmm0 - movd DWORD[16+rdi],xmm1 - movaps xmm6,XMMWORD[((-8-64))+rax] - movaps xmm7,XMMWORD[((-8-48))+rax] - movaps xmm8,XMMWORD[((-8-32))+rax] - movaps xmm9,XMMWORD[((-8-16))+rax] - mov rsp,rax -$L$epilogue_shaext: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret - -$L$SEH_end_sha1_block_data_order_shaext: - ALIGN 16 sha1_block_data_order_ssse3: mov QWORD[8+rsp],rdi ;WIN64 prologue @@ -3835,1724 +3638,6 @@ $L$epilogue_avx: DB 0F3h,0C3h ;repret $L$SEH_end_sha1_block_data_order_avx: - -ALIGN 16 -sha1_block_data_order_avx2: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_sha1_block_data_order_avx2: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - - -_avx2_shortcut: - - mov r11,rsp - - push rbx - - push rbp - - push r12 - - push r13 - - push r14 - - vzeroupper - lea rsp,[((-96))+rsp] - vmovaps XMMWORD[(-40-96)+r11],xmm6 - vmovaps XMMWORD[(-40-80)+r11],xmm7 - vmovaps XMMWORD[(-40-64)+r11],xmm8 - vmovaps XMMWORD[(-40-48)+r11],xmm9 - vmovaps XMMWORD[(-40-32)+r11],xmm10 - vmovaps XMMWORD[(-40-16)+r11],xmm11 -$L$prologue_avx2: - mov r8,rdi - mov r9,rsi - mov r10,rdx - - lea rsp,[((-640))+rsp] - shl r10,6 - lea r13,[64+r9] - and rsp,-128 - add r10,r9 - lea r14,[((K_XX_XX+64))] - - mov eax,DWORD[r8] - cmp r13,r10 - cmovae r13,r9 - mov ebp,DWORD[4+r8] - mov ecx,DWORD[8+r8] - mov edx,DWORD[12+r8] - mov esi,DWORD[16+r8] - vmovdqu ymm6,YMMWORD[64+r14] - - vmovdqu xmm0,XMMWORD[r9] - vmovdqu xmm1,XMMWORD[16+r9] - vmovdqu xmm2,XMMWORD[32+r9] - vmovdqu xmm3,XMMWORD[48+r9] - lea r9,[64+r9] - vinserti128 ymm0,ymm0,XMMWORD[r13],1 - vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 - vpshufb ymm0,ymm0,ymm6 - vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 - vpshufb ymm1,ymm1,ymm6 - vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 - vpshufb ymm2,ymm2,ymm6 - vmovdqu ymm11,YMMWORD[((-64))+r14] - vpshufb ymm3,ymm3,ymm6 - - vpaddd ymm4,ymm0,ymm11 - vpaddd ymm5,ymm1,ymm11 - vmovdqu YMMWORD[rsp],ymm4 - vpaddd ymm6,ymm2,ymm11 - vmovdqu YMMWORD[32+rsp],ymm5 - vpaddd ymm7,ymm3,ymm11 - vmovdqu YMMWORD[64+rsp],ymm6 - vmovdqu YMMWORD[96+rsp],ymm7 - vpalignr ymm4,ymm1,ymm0,8 - vpsrldq ymm8,ymm3,4 - vpxor ymm4,ymm4,ymm0 - vpxor ymm8,ymm8,ymm2 - vpxor ymm4,ymm4,ymm8 - vpsrld ymm8,ymm4,31 - vpslldq ymm10,ymm4,12 - vpaddd ymm4,ymm4,ymm4 - vpsrld ymm9,ymm10,30 - vpor ymm4,ymm4,ymm8 - vpslld ymm10,ymm10,2 - vpxor ymm4,ymm4,ymm9 - vpxor ymm4,ymm4,ymm10 - vpaddd ymm9,ymm4,ymm11 - vmovdqu YMMWORD[128+rsp],ymm9 - vpalignr ymm5,ymm2,ymm1,8 - vpsrldq ymm8,ymm4,4 - vpxor ymm5,ymm5,ymm1 - vpxor ymm8,ymm8,ymm3 - vpxor ymm5,ymm5,ymm8 - vpsrld ymm8,ymm5,31 - vmovdqu ymm11,YMMWORD[((-32))+r14] - vpslldq ymm10,ymm5,12 - vpaddd ymm5,ymm5,ymm5 - vpsrld ymm9,ymm10,30 - vpor ymm5,ymm5,ymm8 - vpslld ymm10,ymm10,2 - vpxor ymm5,ymm5,ymm9 - vpxor ymm5,ymm5,ymm10 - vpaddd ymm9,ymm5,ymm11 - vmovdqu YMMWORD[160+rsp],ymm9 - vpalignr ymm6,ymm3,ymm2,8 - vpsrldq ymm8,ymm5,4 - vpxor ymm6,ymm6,ymm2 - vpxor ymm8,ymm8,ymm4 - vpxor ymm6,ymm6,ymm8 - vpsrld ymm8,ymm6,31 - vpslldq ymm10,ymm6,12 - vpaddd ymm6,ymm6,ymm6 - vpsrld ymm9,ymm10,30 - vpor ymm6,ymm6,ymm8 - vpslld ymm10,ymm10,2 - vpxor ymm6,ymm6,ymm9 - vpxor ymm6,ymm6,ymm10 - vpaddd ymm9,ymm6,ymm11 - vmovdqu YMMWORD[192+rsp],ymm9 - vpalignr ymm7,ymm4,ymm3,8 - vpsrldq ymm8,ymm6,4 - vpxor ymm7,ymm7,ymm3 - vpxor ymm8,ymm8,ymm5 - vpxor ymm7,ymm7,ymm8 - vpsrld ymm8,ymm7,31 - vpslldq ymm10,ymm7,12 - vpaddd ymm7,ymm7,ymm7 - vpsrld ymm9,ymm10,30 - vpor ymm7,ymm7,ymm8 - vpslld ymm10,ymm10,2 - vpxor ymm7,ymm7,ymm9 - vpxor ymm7,ymm7,ymm10 - vpaddd ymm9,ymm7,ymm11 - vmovdqu YMMWORD[224+rsp],ymm9 - lea r13,[128+rsp] - jmp NEAR $L$oop_avx2 -ALIGN 32 -$L$oop_avx2: - rorx ebx,ebp,2 - andn edi,ebp,edx - and ebp,ecx - xor ebp,edi - jmp NEAR $L$align32_1 -ALIGN 32 -$L$align32_1: - vpalignr ymm8,ymm7,ymm6,8 - vpxor ymm0,ymm0,ymm4 - add esi,DWORD[((-128))+r13] - andn edi,eax,ecx - vpxor ymm0,ymm0,ymm1 - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - vpxor ymm0,ymm0,ymm8 - and eax,ebx - add esi,r12d - xor eax,edi - vpsrld ymm8,ymm0,30 - vpslld ymm0,ymm0,2 - add edx,DWORD[((-124))+r13] - andn edi,esi,ebx - add edx,eax - rorx r12d,esi,27 - rorx eax,esi,2 - and esi,ebp - vpor ymm0,ymm0,ymm8 - add edx,r12d - xor esi,edi - add ecx,DWORD[((-120))+r13] - andn edi,edx,ebp - vpaddd ymm9,ymm0,ymm11 - add ecx,esi - rorx r12d,edx,27 - rorx esi,edx,2 - and edx,eax - vmovdqu YMMWORD[256+rsp],ymm9 - add ecx,r12d - xor edx,edi - add ebx,DWORD[((-116))+r13] - andn edi,ecx,eax - add ebx,edx - rorx r12d,ecx,27 - rorx edx,ecx,2 - and ecx,esi - add ebx,r12d - xor ecx,edi - add ebp,DWORD[((-96))+r13] - andn edi,ebx,esi - add ebp,ecx - rorx r12d,ebx,27 - rorx ecx,ebx,2 - and ebx,edx - add ebp,r12d - xor ebx,edi - vpalignr ymm8,ymm0,ymm7,8 - vpxor ymm1,ymm1,ymm5 - add eax,DWORD[((-92))+r13] - andn edi,ebp,edx - vpxor ymm1,ymm1,ymm2 - add eax,ebx - rorx r12d,ebp,27 - rorx ebx,ebp,2 - vpxor ymm1,ymm1,ymm8 - and ebp,ecx - add eax,r12d - xor ebp,edi - vpsrld ymm8,ymm1,30 - vpslld ymm1,ymm1,2 - add esi,DWORD[((-88))+r13] - andn edi,eax,ecx - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - vpor ymm1,ymm1,ymm8 - add esi,r12d - xor eax,edi - add edx,DWORD[((-84))+r13] - andn edi,esi,ebx - vpaddd ymm9,ymm1,ymm11 - add edx,eax - rorx r12d,esi,27 - rorx eax,esi,2 - and esi,ebp - vmovdqu YMMWORD[288+rsp],ymm9 - add edx,r12d - xor esi,edi - add ecx,DWORD[((-64))+r13] - andn edi,edx,ebp - add ecx,esi - rorx r12d,edx,27 - rorx esi,edx,2 - and edx,eax - add ecx,r12d - xor edx,edi - add ebx,DWORD[((-60))+r13] - andn edi,ecx,eax - add ebx,edx - rorx r12d,ecx,27 - rorx edx,ecx,2 - and ecx,esi - add ebx,r12d - xor ecx,edi - vpalignr ymm8,ymm1,ymm0,8 - vpxor ymm2,ymm2,ymm6 - add ebp,DWORD[((-56))+r13] - andn edi,ebx,esi - vpxor ymm2,ymm2,ymm3 - vmovdqu ymm11,YMMWORD[r14] - add ebp,ecx - rorx r12d,ebx,27 - rorx ecx,ebx,2 - vpxor ymm2,ymm2,ymm8 - and ebx,edx - add ebp,r12d - xor ebx,edi - vpsrld ymm8,ymm2,30 - vpslld ymm2,ymm2,2 - add eax,DWORD[((-52))+r13] - andn edi,ebp,edx - add eax,ebx - rorx r12d,ebp,27 - rorx ebx,ebp,2 - and ebp,ecx - vpor ymm2,ymm2,ymm8 - add eax,r12d - xor ebp,edi - add esi,DWORD[((-32))+r13] - andn edi,eax,ecx - vpaddd ymm9,ymm2,ymm11 - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - vmovdqu YMMWORD[320+rsp],ymm9 - add esi,r12d - xor eax,edi - add edx,DWORD[((-28))+r13] - andn edi,esi,ebx - add edx,eax - rorx r12d,esi,27 - rorx eax,esi,2 - and esi,ebp - add edx,r12d - xor esi,edi - add ecx,DWORD[((-24))+r13] - andn edi,edx,ebp - add ecx,esi - rorx r12d,edx,27 - rorx esi,edx,2 - and edx,eax - add ecx,r12d - xor edx,edi - vpalignr ymm8,ymm2,ymm1,8 - vpxor ymm3,ymm3,ymm7 - add ebx,DWORD[((-20))+r13] - andn edi,ecx,eax - vpxor ymm3,ymm3,ymm4 - add ebx,edx - rorx r12d,ecx,27 - rorx edx,ecx,2 - vpxor ymm3,ymm3,ymm8 - and ecx,esi - add ebx,r12d - xor ecx,edi - vpsrld ymm8,ymm3,30 - vpslld ymm3,ymm3,2 - add ebp,DWORD[r13] - andn edi,ebx,esi - add ebp,ecx - rorx r12d,ebx,27 - rorx ecx,ebx,2 - and ebx,edx - vpor ymm3,ymm3,ymm8 - add ebp,r12d - xor ebx,edi - add eax,DWORD[4+r13] - andn edi,ebp,edx - vpaddd ymm9,ymm3,ymm11 - add eax,ebx - rorx r12d,ebp,27 - rorx ebx,ebp,2 - and ebp,ecx - vmovdqu YMMWORD[352+rsp],ymm9 - add eax,r12d - xor ebp,edi - add esi,DWORD[8+r13] - andn edi,eax,ecx - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - add esi,r12d - xor eax,edi - add edx,DWORD[12+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - vpalignr ymm8,ymm3,ymm2,8 - vpxor ymm4,ymm4,ymm0 - add ecx,DWORD[32+r13] - lea ecx,[rsi*1+rcx] - vpxor ymm4,ymm4,ymm5 - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - vpxor ymm4,ymm4,ymm8 - add ecx,r12d - xor edx,ebp - add ebx,DWORD[36+r13] - vpsrld ymm8,ymm4,30 - vpslld ymm4,ymm4,2 - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - vpor ymm4,ymm4,ymm8 - add ebp,DWORD[40+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - vpaddd ymm9,ymm4,ymm11 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[44+r13] - vmovdqu YMMWORD[384+rsp],ymm9 - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[64+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - vpalignr ymm8,ymm4,ymm3,8 - vpxor ymm5,ymm5,ymm1 - add edx,DWORD[68+r13] - lea edx,[rax*1+rdx] - vpxor ymm5,ymm5,ymm6 - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - vpxor ymm5,ymm5,ymm8 - add edx,r12d - xor esi,ebx - add ecx,DWORD[72+r13] - vpsrld ymm8,ymm5,30 - vpslld ymm5,ymm5,2 - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - vpor ymm5,ymm5,ymm8 - add ebx,DWORD[76+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - vpaddd ymm9,ymm5,ymm11 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[96+r13] - vmovdqu YMMWORD[416+rsp],ymm9 - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[100+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - vpalignr ymm8,ymm5,ymm4,8 - vpxor ymm6,ymm6,ymm2 - add esi,DWORD[104+r13] - lea esi,[rbp*1+rsi] - vpxor ymm6,ymm6,ymm7 - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - vpxor ymm6,ymm6,ymm8 - add esi,r12d - xor eax,ecx - add edx,DWORD[108+r13] - lea r13,[256+r13] - vpsrld ymm8,ymm6,30 - vpslld ymm6,ymm6,2 - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - vpor ymm6,ymm6,ymm8 - add ecx,DWORD[((-128))+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - vpaddd ymm9,ymm6,ymm11 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-124))+r13] - vmovdqu YMMWORD[448+rsp],ymm9 - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[((-120))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - vpalignr ymm8,ymm6,ymm5,8 - vpxor ymm7,ymm7,ymm3 - add eax,DWORD[((-116))+r13] - lea eax,[rbx*1+rax] - vpxor ymm7,ymm7,ymm0 - vmovdqu ymm11,YMMWORD[32+r14] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - vpxor ymm7,ymm7,ymm8 - add eax,r12d - xor ebp,edx - add esi,DWORD[((-96))+r13] - vpsrld ymm8,ymm7,30 - vpslld ymm7,ymm7,2 - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - vpor ymm7,ymm7,ymm8 - add edx,DWORD[((-92))+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - vpaddd ymm9,ymm7,ymm11 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[((-88))+r13] - vmovdqu YMMWORD[480+rsp],ymm9 - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-84))+r13] - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - and ecx,edi - jmp NEAR $L$align32_2 -ALIGN 32 -$L$align32_2: - vpalignr ymm8,ymm7,ymm6,8 - vpxor ymm0,ymm0,ymm4 - add ebp,DWORD[((-64))+r13] - xor ecx,esi - vpxor ymm0,ymm0,ymm1 - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - vpxor ymm0,ymm0,ymm8 - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - vpsrld ymm8,ymm0,30 - vpslld ymm0,ymm0,2 - add ebp,r12d - and ebx,edi - add eax,DWORD[((-60))+r13] - xor ebx,edx - mov edi,ecx - xor edi,edx - vpor ymm0,ymm0,ymm8 - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - vpaddd ymm9,ymm0,ymm11 - add eax,r12d - and ebp,edi - add esi,DWORD[((-56))+r13] - xor ebp,ecx - vmovdqu YMMWORD[512+rsp],ymm9 - mov edi,ebx - xor edi,ecx - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - and eax,edi - add edx,DWORD[((-52))+r13] - xor eax,ebx - mov edi,ebp - xor edi,ebx - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - and esi,edi - add ecx,DWORD[((-32))+r13] - xor esi,ebp - mov edi,eax - xor edi,ebp - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - and edx,edi - vpalignr ymm8,ymm0,ymm7,8 - vpxor ymm1,ymm1,ymm5 - add ebx,DWORD[((-28))+r13] - xor edx,eax - vpxor ymm1,ymm1,ymm2 - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - vpxor ymm1,ymm1,ymm8 - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - vpsrld ymm8,ymm1,30 - vpslld ymm1,ymm1,2 - add ebx,r12d - and ecx,edi - add ebp,DWORD[((-24))+r13] - xor ecx,esi - mov edi,edx - xor edi,esi - vpor ymm1,ymm1,ymm8 - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - vpaddd ymm9,ymm1,ymm11 - add ebp,r12d - and ebx,edi - add eax,DWORD[((-20))+r13] - xor ebx,edx - vmovdqu YMMWORD[544+rsp],ymm9 - mov edi,ecx - xor edi,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - and ebp,edi - add esi,DWORD[r13] - xor ebp,ecx - mov edi,ebx - xor edi,ecx - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - and eax,edi - add edx,DWORD[4+r13] - xor eax,ebx - mov edi,ebp - xor edi,ebx - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - and esi,edi - vpalignr ymm8,ymm1,ymm0,8 - vpxor ymm2,ymm2,ymm6 - add ecx,DWORD[8+r13] - xor esi,ebp - vpxor ymm2,ymm2,ymm3 - mov edi,eax - xor edi,ebp - lea ecx,[rsi*1+rcx] - vpxor ymm2,ymm2,ymm8 - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - vpsrld ymm8,ymm2,30 - vpslld ymm2,ymm2,2 - add ecx,r12d - and edx,edi - add ebx,DWORD[12+r13] - xor edx,eax - mov edi,esi - xor edi,eax - vpor ymm2,ymm2,ymm8 - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - vpaddd ymm9,ymm2,ymm11 - add ebx,r12d - and ecx,edi - add ebp,DWORD[32+r13] - xor ecx,esi - vmovdqu YMMWORD[576+rsp],ymm9 - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - and ebx,edi - add eax,DWORD[36+r13] - xor ebx,edx - mov edi,ecx - xor edi,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - and ebp,edi - add esi,DWORD[40+r13] - xor ebp,ecx - mov edi,ebx - xor edi,ecx - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - and eax,edi - vpalignr ymm8,ymm2,ymm1,8 - vpxor ymm3,ymm3,ymm7 - add edx,DWORD[44+r13] - xor eax,ebx - vpxor ymm3,ymm3,ymm4 - mov edi,ebp - xor edi,ebx - lea edx,[rax*1+rdx] - vpxor ymm3,ymm3,ymm8 - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - vpsrld ymm8,ymm3,30 - vpslld ymm3,ymm3,2 - add edx,r12d - and esi,edi - add ecx,DWORD[64+r13] - xor esi,ebp - mov edi,eax - xor edi,ebp - vpor ymm3,ymm3,ymm8 - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - vpaddd ymm9,ymm3,ymm11 - add ecx,r12d - and edx,edi - add ebx,DWORD[68+r13] - xor edx,eax - vmovdqu YMMWORD[608+rsp],ymm9 - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - and ecx,edi - add ebp,DWORD[72+r13] - xor ecx,esi - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - and ebx,edi - add eax,DWORD[76+r13] - xor ebx,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[96+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[100+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[104+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[108+r13] - lea r13,[256+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[((-128))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[((-124))+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[((-120))+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[((-116))+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[((-96))+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-92))+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[((-88))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[((-84))+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[((-64))+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[((-60))+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[((-56))+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-52))+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[((-32))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[((-28))+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[((-24))+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[((-20))+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - add edx,r12d - lea r13,[128+r9] - lea rdi,[128+r9] - cmp r13,r10 - cmovae r13,r9 - - - add edx,DWORD[r8] - add esi,DWORD[4+r8] - add ebp,DWORD[8+r8] - mov DWORD[r8],edx - add ebx,DWORD[12+r8] - mov DWORD[4+r8],esi - mov eax,edx - add ecx,DWORD[16+r8] - mov r12d,ebp - mov DWORD[8+r8],ebp - mov edx,ebx - - mov DWORD[12+r8],ebx - mov ebp,esi - mov DWORD[16+r8],ecx - - mov esi,ecx - mov ecx,r12d - - - cmp r9,r10 - je NEAR $L$done_avx2 - vmovdqu ymm6,YMMWORD[64+r14] - cmp rdi,r10 - ja NEAR $L$ast_avx2 - - vmovdqu xmm0,XMMWORD[((-64))+rdi] - vmovdqu xmm1,XMMWORD[((-48))+rdi] - vmovdqu xmm2,XMMWORD[((-32))+rdi] - vmovdqu xmm3,XMMWORD[((-16))+rdi] - vinserti128 ymm0,ymm0,XMMWORD[r13],1 - vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 - vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 - vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 - jmp NEAR $L$ast_avx2 - -ALIGN 32 -$L$ast_avx2: - lea r13,[((128+16))+rsp] - rorx ebx,ebp,2 - andn edi,ebp,edx - and ebp,ecx - xor ebp,edi - sub r9,-128 - add esi,DWORD[((-128))+r13] - andn edi,eax,ecx - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - add esi,r12d - xor eax,edi - add edx,DWORD[((-124))+r13] - andn edi,esi,ebx - add edx,eax - rorx r12d,esi,27 - rorx eax,esi,2 - and esi,ebp - add edx,r12d - xor esi,edi - add ecx,DWORD[((-120))+r13] - andn edi,edx,ebp - add ecx,esi - rorx r12d,edx,27 - rorx esi,edx,2 - and edx,eax - add ecx,r12d - xor edx,edi - add ebx,DWORD[((-116))+r13] - andn edi,ecx,eax - add ebx,edx - rorx r12d,ecx,27 - rorx edx,ecx,2 - and ecx,esi - add ebx,r12d - xor ecx,edi - add ebp,DWORD[((-96))+r13] - andn edi,ebx,esi - add ebp,ecx - rorx r12d,ebx,27 - rorx ecx,ebx,2 - and ebx,edx - add ebp,r12d - xor ebx,edi - add eax,DWORD[((-92))+r13] - andn edi,ebp,edx - add eax,ebx - rorx r12d,ebp,27 - rorx ebx,ebp,2 - and ebp,ecx - add eax,r12d - xor ebp,edi - add esi,DWORD[((-88))+r13] - andn edi,eax,ecx - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - add esi,r12d - xor eax,edi - add edx,DWORD[((-84))+r13] - andn edi,esi,ebx - add edx,eax - rorx r12d,esi,27 - rorx eax,esi,2 - and esi,ebp - add edx,r12d - xor esi,edi - add ecx,DWORD[((-64))+r13] - andn edi,edx,ebp - add ecx,esi - rorx r12d,edx,27 - rorx esi,edx,2 - and edx,eax - add ecx,r12d - xor edx,edi - add ebx,DWORD[((-60))+r13] - andn edi,ecx,eax - add ebx,edx - rorx r12d,ecx,27 - rorx edx,ecx,2 - and ecx,esi - add ebx,r12d - xor ecx,edi - add ebp,DWORD[((-56))+r13] - andn edi,ebx,esi - add ebp,ecx - rorx r12d,ebx,27 - rorx ecx,ebx,2 - and ebx,edx - add ebp,r12d - xor ebx,edi - add eax,DWORD[((-52))+r13] - andn edi,ebp,edx - add eax,ebx - rorx r12d,ebp,27 - rorx ebx,ebp,2 - and ebp,ecx - add eax,r12d - xor ebp,edi - add esi,DWORD[((-32))+r13] - andn edi,eax,ecx - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - add esi,r12d - xor eax,edi - add edx,DWORD[((-28))+r13] - andn edi,esi,ebx - add edx,eax - rorx r12d,esi,27 - rorx eax,esi,2 - and esi,ebp - add edx,r12d - xor esi,edi - add ecx,DWORD[((-24))+r13] - andn edi,edx,ebp - add ecx,esi - rorx r12d,edx,27 - rorx esi,edx,2 - and edx,eax - add ecx,r12d - xor edx,edi - add ebx,DWORD[((-20))+r13] - andn edi,ecx,eax - add ebx,edx - rorx r12d,ecx,27 - rorx edx,ecx,2 - and ecx,esi - add ebx,r12d - xor ecx,edi - add ebp,DWORD[r13] - andn edi,ebx,esi - add ebp,ecx - rorx r12d,ebx,27 - rorx ecx,ebx,2 - and ebx,edx - add ebp,r12d - xor ebx,edi - add eax,DWORD[4+r13] - andn edi,ebp,edx - add eax,ebx - rorx r12d,ebp,27 - rorx ebx,ebp,2 - and ebp,ecx - add eax,r12d - xor ebp,edi - add esi,DWORD[8+r13] - andn edi,eax,ecx - add esi,ebp - rorx r12d,eax,27 - rorx ebp,eax,2 - and eax,ebx - add esi,r12d - xor eax,edi - add edx,DWORD[12+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[32+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[36+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[40+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[44+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[64+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - vmovdqu ymm11,YMMWORD[((-64))+r14] - vpshufb ymm0,ymm0,ymm6 - add edx,DWORD[68+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[72+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[76+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[96+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[100+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - vpshufb ymm1,ymm1,ymm6 - vpaddd ymm8,ymm0,ymm11 - add esi,DWORD[104+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[108+r13] - lea r13,[256+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[((-128))+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-124))+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[((-120))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - vmovdqu YMMWORD[rsp],ymm8 - vpshufb ymm2,ymm2,ymm6 - vpaddd ymm9,ymm1,ymm11 - add eax,DWORD[((-116))+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[((-96))+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[((-92))+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - xor esi,ebx - add ecx,DWORD[((-88))+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-84))+r13] - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - and ecx,edi - vmovdqu YMMWORD[32+rsp],ymm9 - vpshufb ymm3,ymm3,ymm6 - vpaddd ymm6,ymm2,ymm11 - add ebp,DWORD[((-64))+r13] - xor ecx,esi - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - and ebx,edi - add eax,DWORD[((-60))+r13] - xor ebx,edx - mov edi,ecx - xor edi,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - and ebp,edi - add esi,DWORD[((-56))+r13] - xor ebp,ecx - mov edi,ebx - xor edi,ecx - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - and eax,edi - add edx,DWORD[((-52))+r13] - xor eax,ebx - mov edi,ebp - xor edi,ebx - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - and esi,edi - add ecx,DWORD[((-32))+r13] - xor esi,ebp - mov edi,eax - xor edi,ebp - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - and edx,edi - jmp NEAR $L$align32_3 -ALIGN 32 -$L$align32_3: - vmovdqu YMMWORD[64+rsp],ymm6 - vpaddd ymm7,ymm3,ymm11 - add ebx,DWORD[((-28))+r13] - xor edx,eax - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - and ecx,edi - add ebp,DWORD[((-24))+r13] - xor ecx,esi - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - and ebx,edi - add eax,DWORD[((-20))+r13] - xor ebx,edx - mov edi,ecx - xor edi,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - and ebp,edi - add esi,DWORD[r13] - xor ebp,ecx - mov edi,ebx - xor edi,ecx - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - and eax,edi - add edx,DWORD[4+r13] - xor eax,ebx - mov edi,ebp - xor edi,ebx - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - and esi,edi - vmovdqu YMMWORD[96+rsp],ymm7 - add ecx,DWORD[8+r13] - xor esi,ebp - mov edi,eax - xor edi,ebp - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - and edx,edi - add ebx,DWORD[12+r13] - xor edx,eax - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - and ecx,edi - add ebp,DWORD[32+r13] - xor ecx,esi - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - and ebx,edi - add eax,DWORD[36+r13] - xor ebx,edx - mov edi,ecx - xor edi,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - and ebp,edi - add esi,DWORD[40+r13] - xor ebp,ecx - mov edi,ebx - xor edi,ecx - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - and eax,edi - vpalignr ymm4,ymm1,ymm0,8 - add edx,DWORD[44+r13] - xor eax,ebx - mov edi,ebp - xor edi,ebx - vpsrldq ymm8,ymm3,4 - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - vpxor ymm4,ymm4,ymm0 - vpxor ymm8,ymm8,ymm2 - xor esi,ebp - add edx,r12d - vpxor ymm4,ymm4,ymm8 - and esi,edi - add ecx,DWORD[64+r13] - xor esi,ebp - mov edi,eax - vpsrld ymm8,ymm4,31 - xor edi,ebp - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - vpslldq ymm10,ymm4,12 - vpaddd ymm4,ymm4,ymm4 - rorx esi,edx,2 - xor edx,eax - vpsrld ymm9,ymm10,30 - vpor ymm4,ymm4,ymm8 - add ecx,r12d - and edx,edi - vpslld ymm10,ymm10,2 - vpxor ymm4,ymm4,ymm9 - add ebx,DWORD[68+r13] - xor edx,eax - vpxor ymm4,ymm4,ymm10 - mov edi,esi - xor edi,eax - lea ebx,[rdx*1+rbx] - vpaddd ymm9,ymm4,ymm11 - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - vmovdqu YMMWORD[128+rsp],ymm9 - add ebx,r12d - and ecx,edi - add ebp,DWORD[72+r13] - xor ecx,esi - mov edi,edx - xor edi,esi - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - and ebx,edi - add eax,DWORD[76+r13] - xor ebx,edx - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - vpalignr ymm5,ymm2,ymm1,8 - add esi,DWORD[96+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - vpsrldq ymm8,ymm4,4 - xor eax,ebx - add esi,r12d - xor eax,ecx - vpxor ymm5,ymm5,ymm1 - vpxor ymm8,ymm8,ymm3 - add edx,DWORD[100+r13] - lea edx,[rax*1+rdx] - vpxor ymm5,ymm5,ymm8 - rorx r12d,esi,27 - rorx eax,esi,2 - xor esi,ebp - add edx,r12d - vpsrld ymm8,ymm5,31 - vmovdqu ymm11,YMMWORD[((-32))+r14] - xor esi,ebx - add ecx,DWORD[104+r13] - lea ecx,[rsi*1+rcx] - vpslldq ymm10,ymm5,12 - vpaddd ymm5,ymm5,ymm5 - rorx r12d,edx,27 - rorx esi,edx,2 - vpsrld ymm9,ymm10,30 - vpor ymm5,ymm5,ymm8 - xor edx,eax - add ecx,r12d - vpslld ymm10,ymm10,2 - vpxor ymm5,ymm5,ymm9 - xor edx,ebp - add ebx,DWORD[108+r13] - lea r13,[256+r13] - vpxor ymm5,ymm5,ymm10 - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - vpaddd ymm9,ymm5,ymm11 - xor ecx,esi - add ebx,r12d - xor ecx,eax - vmovdqu YMMWORD[160+rsp],ymm9 - add ebp,DWORD[((-128))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - vpalignr ymm6,ymm3,ymm2,8 - add eax,DWORD[((-124))+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - vpsrldq ymm8,ymm5,4 - xor ebp,ecx - add eax,r12d - xor ebp,edx - vpxor ymm6,ymm6,ymm2 - vpxor ymm8,ymm8,ymm4 - add esi,DWORD[((-120))+r13] - lea esi,[rbp*1+rsi] - vpxor ymm6,ymm6,ymm8 - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - vpsrld ymm8,ymm6,31 - xor eax,ecx - add edx,DWORD[((-116))+r13] - lea edx,[rax*1+rdx] - vpslldq ymm10,ymm6,12 - vpaddd ymm6,ymm6,ymm6 - rorx r12d,esi,27 - rorx eax,esi,2 - vpsrld ymm9,ymm10,30 - vpor ymm6,ymm6,ymm8 - xor esi,ebp - add edx,r12d - vpslld ymm10,ymm10,2 - vpxor ymm6,ymm6,ymm9 - xor esi,ebx - add ecx,DWORD[((-96))+r13] - vpxor ymm6,ymm6,ymm10 - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - vpaddd ymm9,ymm6,ymm11 - xor edx,eax - add ecx,r12d - xor edx,ebp - vmovdqu YMMWORD[192+rsp],ymm9 - add ebx,DWORD[((-92))+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - vpalignr ymm7,ymm4,ymm3,8 - add ebp,DWORD[((-88))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - vpsrldq ymm8,ymm6,4 - xor ebx,edx - add ebp,r12d - xor ebx,esi - vpxor ymm7,ymm7,ymm3 - vpxor ymm8,ymm8,ymm5 - add eax,DWORD[((-84))+r13] - lea eax,[rbx*1+rax] - vpxor ymm7,ymm7,ymm8 - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - vpsrld ymm8,ymm7,31 - xor ebp,edx - add esi,DWORD[((-64))+r13] - lea esi,[rbp*1+rsi] - vpslldq ymm10,ymm7,12 - vpaddd ymm7,ymm7,ymm7 - rorx r12d,eax,27 - rorx ebp,eax,2 - vpsrld ymm9,ymm10,30 - vpor ymm7,ymm7,ymm8 - xor eax,ebx - add esi,r12d - vpslld ymm10,ymm10,2 - vpxor ymm7,ymm7,ymm9 - xor eax,ecx - add edx,DWORD[((-60))+r13] - vpxor ymm7,ymm7,ymm10 - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - rorx eax,esi,2 - vpaddd ymm9,ymm7,ymm11 - xor esi,ebp - add edx,r12d - xor esi,ebx - vmovdqu YMMWORD[224+rsp],ymm9 - add ecx,DWORD[((-56))+r13] - lea ecx,[rsi*1+rcx] - rorx r12d,edx,27 - rorx esi,edx,2 - xor edx,eax - add ecx,r12d - xor edx,ebp - add ebx,DWORD[((-52))+r13] - lea ebx,[rdx*1+rbx] - rorx r12d,ecx,27 - rorx edx,ecx,2 - xor ecx,esi - add ebx,r12d - xor ecx,eax - add ebp,DWORD[((-32))+r13] - lea ebp,[rbp*1+rcx] - rorx r12d,ebx,27 - rorx ecx,ebx,2 - xor ebx,edx - add ebp,r12d - xor ebx,esi - add eax,DWORD[((-28))+r13] - lea eax,[rbx*1+rax] - rorx r12d,ebp,27 - rorx ebx,ebp,2 - xor ebp,ecx - add eax,r12d - xor ebp,edx - add esi,DWORD[((-24))+r13] - lea esi,[rbp*1+rsi] - rorx r12d,eax,27 - rorx ebp,eax,2 - xor eax,ebx - add esi,r12d - xor eax,ecx - add edx,DWORD[((-20))+r13] - lea edx,[rax*1+rdx] - rorx r12d,esi,27 - add edx,r12d - lea r13,[128+rsp] - - - add edx,DWORD[r8] - add esi,DWORD[4+r8] - add ebp,DWORD[8+r8] - mov DWORD[r8],edx - add ebx,DWORD[12+r8] - mov DWORD[4+r8],esi - mov eax,edx - add ecx,DWORD[16+r8] - mov r12d,ebp - mov DWORD[8+r8],ebp - mov edx,ebx - - mov DWORD[12+r8],ebx - mov ebp,esi - mov DWORD[16+r8],ecx - - mov esi,ecx - mov ecx,r12d - - - cmp r9,r10 - jbe NEAR $L$oop_avx2 - -$L$done_avx2: - vzeroupper - movaps xmm6,XMMWORD[((-40-96))+r11] - movaps xmm7,XMMWORD[((-40-80))+r11] - movaps xmm8,XMMWORD[((-40-64))+r11] - movaps xmm9,XMMWORD[((-40-48))+r11] - movaps xmm10,XMMWORD[((-40-32))+r11] - movaps xmm11,XMMWORD[((-40-16))+r11] - mov r14,QWORD[((-40))+r11] - - mov r13,QWORD[((-32))+r11] - - mov r12,QWORD[((-24))+r11] - - mov rbp,QWORD[((-16))+r11] - - mov rbx,QWORD[((-8))+r11] - - lea rsp,[r11] - -$L$epilogue_avx2: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret - -$L$SEH_end_sha1_block_data_order_avx2: ALIGN 64 K_XX_XX: DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 @@ -5617,38 +3702,6 @@ se_handler: ALIGN 16 -shaext_handler: - push rsi - push rdi - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - pushfq - sub rsp,64 - - mov rax,QWORD[120+r8] - mov rbx,QWORD[248+r8] - - lea r10,[$L$prologue_shaext] - cmp rbx,r10 - jb NEAR $L$common_seh_tail - - lea r10,[$L$epilogue_shaext] - cmp rbx,r10 - jae NEAR $L$common_seh_tail - - lea rsi,[((-8-64))+rax] - lea rdi,[512+r8] - mov ecx,8 - DD 0xa548f3fc - - jmp NEAR $L$common_seh_tail - - -ALIGN 16 ssse3_handler: push rsi push rdi @@ -5740,26 +3793,17 @@ ALIGN 4 DD $L$SEH_begin_sha1_block_data_order wrt ..imagebase DD $L$SEH_end_sha1_block_data_order wrt ..imagebase DD $L$SEH_info_sha1_block_data_order wrt ..imagebase - DD $L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase - DD $L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase - DD $L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase - DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase - DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase - DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase section .xdata rdata align=8 ALIGN 8 $L$SEH_info_sha1_block_data_order: DB 9,0,0,0 DD se_handler wrt ..imagebase -$L$SEH_info_sha1_block_data_order_shaext: -DB 9,0,0,0 - DD shaext_handler wrt ..imagebase $L$SEH_info_sha1_block_data_order_ssse3: DB 9,0,0,0 DD ssse3_handler wrt ..imagebase @@ -5768,7 +3812,3 @@ $L$SEH_info_sha1_block_data_order_avx: DB 9,0,0,0 DD ssse3_handler wrt ..imagebase DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase -$L$SEH_info_sha1_block_data_order_avx2: -DB 9,0,0,0 - DD ssse3_handler wrt ..imagebase - DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase |