diff options
author | Robert Sloan <varomodt@google.com> | 2017-01-09 10:53:07 -0800 |
---|---|---|
committer | Robert Sloan <varomodt@google.com> | 2017-01-09 15:18:23 -0800 |
commit | 69939df2891f62f7f00ff2ac275f1cd81a67454c (patch) | |
tree | 467d0f510b31deef87dc70b119ea143090cc67a6 /win-x86_64 | |
parent | e7531f038363d24a103c820cff38898455ff66fe (diff) | |
download | boringssl-69939df2891f62f7f00ff2ac275f1cd81a67454c.tar.gz |
external/boringssl: Sync to 9c33ae85621ef8e00a42309b5101e0bedd02b816.android-n-mr2-preview-1
This includes the following changes:
https://boringssl.googlesource.com/boringssl/+log/629db8cd0c84628e37aa81242b5b07fec7602f55..9c33ae85621ef8e00a42309b5101e0bedd02b816
Bug: 33622440
Test: BoringSSL tests
Change-Id: I20da15ad995a620b6b2f08db20c77ebd0f05ca10
Diffstat (limited to 'win-x86_64')
-rw-r--r-- | win-x86_64/crypto/bn/rsaz-x86_64.asm | 1495 |
1 files changed, 0 insertions, 1495 deletions
diff --git a/win-x86_64/crypto/bn/rsaz-x86_64.asm b/win-x86_64/crypto/bn/rsaz-x86_64.asm deleted file mode 100644 index 72ec5052..00000000 --- a/win-x86_64/crypto/bn/rsaz-x86_64.asm +++ /dev/null @@ -1,1495 +0,0 @@ -default rel -%define XMMWORD -%define YMMWORD -%define ZMMWORD -section .text code align=64 - - -EXTERN OPENSSL_ia32cap_P - -global rsaz_512_sqr - -ALIGN 32 -rsaz_512_sqr: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_rsaz_512_sqr: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - mov r8,QWORD[40+rsp] - - - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - - sub rsp,128+24 -$L$sqr_body: - mov rbp,rdx - mov rdx,QWORD[rsi] - mov rax,QWORD[8+rsi] - mov QWORD[128+rsp],rcx - jmp NEAR $L$oop_sqr - -ALIGN 32 -$L$oop_sqr: - mov DWORD[((128+8))+rsp],r8d - - mov rbx,rdx - mul rdx - mov r8,rax - mov rax,QWORD[16+rsi] - mov r9,rdx - - mul rbx - add r9,rax - mov rax,QWORD[24+rsi] - mov r10,rdx - adc r10,0 - - mul rbx - add r10,rax - mov rax,QWORD[32+rsi] - mov r11,rdx - adc r11,0 - - mul rbx - add r11,rax - mov rax,QWORD[40+rsi] - mov r12,rdx - adc r12,0 - - mul rbx - add r12,rax - mov rax,QWORD[48+rsi] - mov r13,rdx - adc r13,0 - - mul rbx - add r13,rax - mov rax,QWORD[56+rsi] - mov r14,rdx - adc r14,0 - - mul rbx - add r14,rax - mov rax,rbx - mov r15,rdx - adc r15,0 - - add r8,r8 - mov rcx,r9 - adc r9,r9 - - mul rax - mov QWORD[rsp],rax - add r8,rdx - adc r9,0 - - mov QWORD[8+rsp],r8 - shr rcx,63 - - - mov r8,QWORD[8+rsi] - mov rax,QWORD[16+rsi] - mul r8 - add r10,rax - mov rax,QWORD[24+rsi] - mov rbx,rdx - adc rbx,0 - - mul r8 - add r11,rax - mov rax,QWORD[32+rsi] - adc rdx,0 - add r11,rbx - mov rbx,rdx - adc rbx,0 - - mul r8 - add r12,rax - mov rax,QWORD[40+rsi] - adc rdx,0 - add r12,rbx - mov rbx,rdx - adc rbx,0 - - mul r8 - add r13,rax - mov rax,QWORD[48+rsi] - adc rdx,0 - add r13,rbx - mov rbx,rdx - adc rbx,0 - - mul r8 - add r14,rax - mov rax,QWORD[56+rsi] - adc rdx,0 - add r14,rbx - mov rbx,rdx - adc rbx,0 - - mul r8 - add r15,rax - mov rax,r8 - adc rdx,0 - add r15,rbx - mov r8,rdx - mov rdx,r10 - adc r8,0 - - add rdx,rdx - lea r10,[r10*2+rcx] - mov rbx,r11 - adc r11,r11 - - mul rax - add r9,rax - adc r10,rdx - adc r11,0 - - mov QWORD[16+rsp],r9 - mov QWORD[24+rsp],r10 - shr rbx,63 - - - mov r9,QWORD[16+rsi] - mov rax,QWORD[24+rsi] - mul r9 - add r12,rax - mov rax,QWORD[32+rsi] - mov rcx,rdx - adc rcx,0 - - mul r9 - add r13,rax - mov rax,QWORD[40+rsi] - adc rdx,0 - add r13,rcx - mov rcx,rdx - adc rcx,0 - - mul r9 - add r14,rax - mov rax,QWORD[48+rsi] - adc rdx,0 - add r14,rcx - mov rcx,rdx - adc rcx,0 - - mul r9 - mov r10,r12 - lea r12,[r12*2+rbx] - add r15,rax - mov rax,QWORD[56+rsi] - adc rdx,0 - add r15,rcx - mov rcx,rdx - adc rcx,0 - - mul r9 - shr r10,63 - add r8,rax - mov rax,r9 - adc rdx,0 - add r8,rcx - mov r9,rdx - adc r9,0 - - mov rcx,r13 - lea r13,[r13*2+r10] - - mul rax - add r11,rax - adc r12,rdx - adc r13,0 - - mov QWORD[32+rsp],r11 - mov QWORD[40+rsp],r12 - shr rcx,63 - - - mov r10,QWORD[24+rsi] - mov rax,QWORD[32+rsi] - mul r10 - add r14,rax - mov rax,QWORD[40+rsi] - mov rbx,rdx - adc rbx,0 - - mul r10 - add r15,rax - mov rax,QWORD[48+rsi] - adc rdx,0 - add r15,rbx - mov rbx,rdx - adc rbx,0 - - mul r10 - mov r12,r14 - lea r14,[r14*2+rcx] - add r8,rax - mov rax,QWORD[56+rsi] - adc rdx,0 - add r8,rbx - mov rbx,rdx - adc rbx,0 - - mul r10 - shr r12,63 - add r9,rax - mov rax,r10 - adc rdx,0 - add r9,rbx - mov r10,rdx - adc r10,0 - - mov rbx,r15 - lea r15,[r15*2+r12] - - mul rax - add r13,rax - adc r14,rdx - adc r15,0 - - mov QWORD[48+rsp],r13 - mov QWORD[56+rsp],r14 - shr rbx,63 - - - mov r11,QWORD[32+rsi] - mov rax,QWORD[40+rsi] - mul r11 - add r8,rax - mov rax,QWORD[48+rsi] - mov rcx,rdx - adc rcx,0 - - mul r11 - add r9,rax - mov rax,QWORD[56+rsi] - adc rdx,0 - mov r12,r8 - lea r8,[r8*2+rbx] - add r9,rcx - mov rcx,rdx - adc rcx,0 - - mul r11 - shr r12,63 - add r10,rax - mov rax,r11 - adc rdx,0 - add r10,rcx - mov r11,rdx - adc r11,0 - - mov rcx,r9 - lea r9,[r9*2+r12] - - mul rax - add r15,rax - adc r8,rdx - adc r9,0 - - mov QWORD[64+rsp],r15 - mov QWORD[72+rsp],r8 - shr rcx,63 - - - mov r12,QWORD[40+rsi] - mov rax,QWORD[48+rsi] - mul r12 - add r10,rax - mov rax,QWORD[56+rsi] - mov rbx,rdx - adc rbx,0 - - mul r12 - add r11,rax - mov rax,r12 - mov r15,r10 - lea r10,[r10*2+rcx] - adc rdx,0 - shr r15,63 - add r11,rbx - mov r12,rdx - adc r12,0 - - mov rbx,r11 - lea r11,[r11*2+r15] - - mul rax - add r9,rax - adc r10,rdx - adc r11,0 - - mov QWORD[80+rsp],r9 - mov QWORD[88+rsp],r10 - - - mov r13,QWORD[48+rsi] - mov rax,QWORD[56+rsi] - mul r13 - add r12,rax - mov rax,r13 - mov r13,rdx - adc r13,0 - - xor r14,r14 - shl rbx,1 - adc r12,r12 - adc r13,r13 - adc r14,r14 - - mul rax - add r11,rax - adc r12,rdx - adc r13,0 - - mov QWORD[96+rsp],r11 - mov QWORD[104+rsp],r12 - - - mov rax,QWORD[56+rsi] - mul rax - add r13,rax - adc rdx,0 - - add r14,rdx - - mov QWORD[112+rsp],r13 - mov QWORD[120+rsp],r14 - - mov r8,QWORD[rsp] - mov r9,QWORD[8+rsp] - mov r10,QWORD[16+rsp] - mov r11,QWORD[24+rsp] - mov r12,QWORD[32+rsp] - mov r13,QWORD[40+rsp] - mov r14,QWORD[48+rsp] - mov r15,QWORD[56+rsp] - - call __rsaz_512_reduce - - add r8,QWORD[64+rsp] - adc r9,QWORD[72+rsp] - adc r10,QWORD[80+rsp] - adc r11,QWORD[88+rsp] - adc r12,QWORD[96+rsp] - adc r13,QWORD[104+rsp] - adc r14,QWORD[112+rsp] - adc r15,QWORD[120+rsp] - sbb rcx,rcx - - call __rsaz_512_subtract - - mov rdx,r8 - mov rax,r9 - mov r8d,DWORD[((128+8))+rsp] - mov rsi,rdi - - dec r8d - jnz NEAR $L$oop_sqr - - lea rax,[((128+24+48))+rsp] - mov r15,QWORD[((-48))+rax] - mov r14,QWORD[((-40))+rax] - mov r13,QWORD[((-32))+rax] - mov r12,QWORD[((-24))+rax] - mov rbp,QWORD[((-16))+rax] - mov rbx,QWORD[((-8))+rax] - lea rsp,[rax] -$L$sqr_epilogue: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_rsaz_512_sqr: -global rsaz_512_mul - -ALIGN 32 -rsaz_512_mul: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_rsaz_512_mul: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - mov r8,QWORD[40+rsp] - - - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - - sub rsp,128+24 -$L$mul_body: -DB 102,72,15,110,199 -DB 102,72,15,110,201 - mov QWORD[128+rsp],r8 - mov rbx,QWORD[rdx] - mov rbp,rdx - call __rsaz_512_mul - -DB 102,72,15,126,199 -DB 102,72,15,126,205 - - mov r8,QWORD[rsp] - mov r9,QWORD[8+rsp] - mov r10,QWORD[16+rsp] - mov r11,QWORD[24+rsp] - mov r12,QWORD[32+rsp] - mov r13,QWORD[40+rsp] - mov r14,QWORD[48+rsp] - mov r15,QWORD[56+rsp] - - call __rsaz_512_reduce - add r8,QWORD[64+rsp] - adc r9,QWORD[72+rsp] - adc r10,QWORD[80+rsp] - adc r11,QWORD[88+rsp] - adc r12,QWORD[96+rsp] - adc r13,QWORD[104+rsp] - adc r14,QWORD[112+rsp] - adc r15,QWORD[120+rsp] - sbb rcx,rcx - - call __rsaz_512_subtract - - lea rax,[((128+24+48))+rsp] - mov r15,QWORD[((-48))+rax] - mov r14,QWORD[((-40))+rax] - mov r13,QWORD[((-32))+rax] - mov r12,QWORD[((-24))+rax] - mov rbp,QWORD[((-16))+rax] - mov rbx,QWORD[((-8))+rax] - lea rsp,[rax] -$L$mul_epilogue: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_rsaz_512_mul: -global rsaz_512_mul_gather4 - -ALIGN 32 -rsaz_512_mul_gather4: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_rsaz_512_mul_gather4: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - mov r8,QWORD[40+rsp] - mov r9,QWORD[48+rsp] - - - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - - sub rsp,328 - movaps XMMWORD[160+rsp],xmm6 - movaps XMMWORD[176+rsp],xmm7 - movaps XMMWORD[192+rsp],xmm8 - movaps XMMWORD[208+rsp],xmm9 - movaps XMMWORD[224+rsp],xmm10 - movaps XMMWORD[240+rsp],xmm11 - movaps XMMWORD[256+rsp],xmm12 - movaps XMMWORD[272+rsp],xmm13 - movaps XMMWORD[288+rsp],xmm14 - movaps XMMWORD[304+rsp],xmm15 -$L$mul_gather4_body: - movd xmm8,r9d - movdqa xmm1,XMMWORD[(($L$inc+16))] - movdqa xmm0,XMMWORD[$L$inc] - - pshufd xmm8,xmm8,0 - movdqa xmm7,xmm1 - movdqa xmm2,xmm1 - paddd xmm1,xmm0 - pcmpeqd xmm0,xmm8 - movdqa xmm3,xmm7 - paddd xmm2,xmm1 - pcmpeqd xmm1,xmm8 - movdqa xmm4,xmm7 - paddd xmm3,xmm2 - pcmpeqd xmm2,xmm8 - movdqa xmm5,xmm7 - paddd xmm4,xmm3 - pcmpeqd xmm3,xmm8 - movdqa xmm6,xmm7 - paddd xmm5,xmm4 - pcmpeqd xmm4,xmm8 - paddd xmm6,xmm5 - pcmpeqd xmm5,xmm8 - paddd xmm7,xmm6 - pcmpeqd xmm6,xmm8 - pcmpeqd xmm7,xmm8 - - movdqa xmm8,XMMWORD[rdx] - movdqa xmm9,XMMWORD[16+rdx] - movdqa xmm10,XMMWORD[32+rdx] - movdqa xmm11,XMMWORD[48+rdx] - pand xmm8,xmm0 - movdqa xmm12,XMMWORD[64+rdx] - pand xmm9,xmm1 - movdqa xmm13,XMMWORD[80+rdx] - pand xmm10,xmm2 - movdqa xmm14,XMMWORD[96+rdx] - pand xmm11,xmm3 - movdqa xmm15,XMMWORD[112+rdx] - lea rbp,[128+rdx] - pand xmm12,xmm4 - pand xmm13,xmm5 - pand xmm14,xmm6 - pand xmm15,xmm7 - por xmm8,xmm10 - por xmm9,xmm11 - por xmm8,xmm12 - por xmm9,xmm13 - por xmm8,xmm14 - por xmm9,xmm15 - - por xmm8,xmm9 - pshufd xmm9,xmm8,0x4e - por xmm8,xmm9 -DB 102,76,15,126,195 - - mov QWORD[128+rsp],r8 - mov QWORD[((128+8))+rsp],rdi - mov QWORD[((128+16))+rsp],rcx - - mov rax,QWORD[rsi] - mov rcx,QWORD[8+rsi] - mul rbx - mov QWORD[rsp],rax - mov rax,rcx - mov r8,rdx - - mul rbx - add r8,rax - mov rax,QWORD[16+rsi] - mov r9,rdx - adc r9,0 - - mul rbx - add r9,rax - mov rax,QWORD[24+rsi] - mov r10,rdx - adc r10,0 - - mul rbx - add r10,rax - mov rax,QWORD[32+rsi] - mov r11,rdx - adc r11,0 - - mul rbx - add r11,rax - mov rax,QWORD[40+rsi] - mov r12,rdx - adc r12,0 - - mul rbx - add r12,rax - mov rax,QWORD[48+rsi] - mov r13,rdx - adc r13,0 - - mul rbx - add r13,rax - mov rax,QWORD[56+rsi] - mov r14,rdx - adc r14,0 - - mul rbx - add r14,rax - mov rax,QWORD[rsi] - mov r15,rdx - adc r15,0 - - lea rdi,[8+rsp] - mov ecx,7 - jmp NEAR $L$oop_mul_gather - -ALIGN 32 -$L$oop_mul_gather: - movdqa xmm8,XMMWORD[rbp] - movdqa xmm9,XMMWORD[16+rbp] - movdqa xmm10,XMMWORD[32+rbp] - movdqa xmm11,XMMWORD[48+rbp] - pand xmm8,xmm0 - movdqa xmm12,XMMWORD[64+rbp] - pand xmm9,xmm1 - movdqa xmm13,XMMWORD[80+rbp] - pand xmm10,xmm2 - movdqa xmm14,XMMWORD[96+rbp] - pand xmm11,xmm3 - movdqa xmm15,XMMWORD[112+rbp] - lea rbp,[128+rbp] - pand xmm12,xmm4 - pand xmm13,xmm5 - pand xmm14,xmm6 - pand xmm15,xmm7 - por xmm8,xmm10 - por xmm9,xmm11 - por xmm8,xmm12 - por xmm9,xmm13 - por xmm8,xmm14 - por xmm9,xmm15 - - por xmm8,xmm9 - pshufd xmm9,xmm8,0x4e - por xmm8,xmm9 -DB 102,76,15,126,195 - - mul rbx - add r8,rax - mov rax,QWORD[8+rsi] - mov QWORD[rdi],r8 - mov r8,rdx - adc r8,0 - - mul rbx - add r9,rax - mov rax,QWORD[16+rsi] - adc rdx,0 - add r8,r9 - mov r9,rdx - adc r9,0 - - mul rbx - add r10,rax - mov rax,QWORD[24+rsi] - adc rdx,0 - add r9,r10 - mov r10,rdx - adc r10,0 - - mul rbx - add r11,rax - mov rax,QWORD[32+rsi] - adc rdx,0 - add r10,r11 - mov r11,rdx - adc r11,0 - - mul rbx - add r12,rax - mov rax,QWORD[40+rsi] - adc rdx,0 - add r11,r12 - mov r12,rdx - adc r12,0 - - mul rbx - add r13,rax - mov rax,QWORD[48+rsi] - adc rdx,0 - add r12,r13 - mov r13,rdx - adc r13,0 - - mul rbx - add r14,rax - mov rax,QWORD[56+rsi] - adc rdx,0 - add r13,r14 - mov r14,rdx - adc r14,0 - - mul rbx - add r15,rax - mov rax,QWORD[rsi] - adc rdx,0 - add r14,r15 - mov r15,rdx - adc r15,0 - - lea rdi,[8+rdi] - - dec ecx - jnz NEAR $L$oop_mul_gather - - mov QWORD[rdi],r8 - mov QWORD[8+rdi],r9 - mov QWORD[16+rdi],r10 - mov QWORD[24+rdi],r11 - mov QWORD[32+rdi],r12 - mov QWORD[40+rdi],r13 - mov QWORD[48+rdi],r14 - mov QWORD[56+rdi],r15 - - mov rdi,QWORD[((128+8))+rsp] - mov rbp,QWORD[((128+16))+rsp] - - mov r8,QWORD[rsp] - mov r9,QWORD[8+rsp] - mov r10,QWORD[16+rsp] - mov r11,QWORD[24+rsp] - mov r12,QWORD[32+rsp] - mov r13,QWORD[40+rsp] - mov r14,QWORD[48+rsp] - mov r15,QWORD[56+rsp] - - call __rsaz_512_reduce - add r8,QWORD[64+rsp] - adc r9,QWORD[72+rsp] - adc r10,QWORD[80+rsp] - adc r11,QWORD[88+rsp] - adc r12,QWORD[96+rsp] - adc r13,QWORD[104+rsp] - adc r14,QWORD[112+rsp] - adc r15,QWORD[120+rsp] - sbb rcx,rcx - - call __rsaz_512_subtract - - lea rax,[((128+24+48))+rsp] - movaps xmm6,XMMWORD[((160-200))+rax] - movaps xmm7,XMMWORD[((176-200))+rax] - movaps xmm8,XMMWORD[((192-200))+rax] - movaps xmm9,XMMWORD[((208-200))+rax] - movaps xmm10,XMMWORD[((224-200))+rax] - movaps xmm11,XMMWORD[((240-200))+rax] - movaps xmm12,XMMWORD[((256-200))+rax] - movaps xmm13,XMMWORD[((272-200))+rax] - movaps xmm14,XMMWORD[((288-200))+rax] - movaps xmm15,XMMWORD[((304-200))+rax] - lea rax,[176+rax] - mov r15,QWORD[((-48))+rax] - mov r14,QWORD[((-40))+rax] - mov r13,QWORD[((-32))+rax] - mov r12,QWORD[((-24))+rax] - mov rbp,QWORD[((-16))+rax] - mov rbx,QWORD[((-8))+rax] - lea rsp,[rax] -$L$mul_gather4_epilogue: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_rsaz_512_mul_gather4: -global rsaz_512_mul_scatter4 - -ALIGN 32 -rsaz_512_mul_scatter4: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_rsaz_512_mul_scatter4: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - mov r8,QWORD[40+rsp] - mov r9,QWORD[48+rsp] - - - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - - mov r9d,r9d - sub rsp,128+24 -$L$mul_scatter4_body: - lea r8,[r9*8+r8] -DB 102,72,15,110,199 -DB 102,72,15,110,202 -DB 102,73,15,110,208 - mov QWORD[128+rsp],rcx - - mov rbp,rdi - mov rbx,QWORD[rdi] - call __rsaz_512_mul - -DB 102,72,15,126,199 -DB 102,72,15,126,205 - - mov r8,QWORD[rsp] - mov r9,QWORD[8+rsp] - mov r10,QWORD[16+rsp] - mov r11,QWORD[24+rsp] - mov r12,QWORD[32+rsp] - mov r13,QWORD[40+rsp] - mov r14,QWORD[48+rsp] - mov r15,QWORD[56+rsp] - - call __rsaz_512_reduce - add r8,QWORD[64+rsp] - adc r9,QWORD[72+rsp] - adc r10,QWORD[80+rsp] - adc r11,QWORD[88+rsp] - adc r12,QWORD[96+rsp] - adc r13,QWORD[104+rsp] - adc r14,QWORD[112+rsp] - adc r15,QWORD[120+rsp] -DB 102,72,15,126,214 - sbb rcx,rcx - - call __rsaz_512_subtract - - mov QWORD[rsi],r8 - mov QWORD[128+rsi],r9 - mov QWORD[256+rsi],r10 - mov QWORD[384+rsi],r11 - mov QWORD[512+rsi],r12 - mov QWORD[640+rsi],r13 - mov QWORD[768+rsi],r14 - mov QWORD[896+rsi],r15 - - lea rax,[((128+24+48))+rsp] - mov r15,QWORD[((-48))+rax] - mov r14,QWORD[((-40))+rax] - mov r13,QWORD[((-32))+rax] - mov r12,QWORD[((-24))+rax] - mov rbp,QWORD[((-16))+rax] - mov rbx,QWORD[((-8))+rax] - lea rsp,[rax] -$L$mul_scatter4_epilogue: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_rsaz_512_mul_scatter4: -global rsaz_512_mul_by_one - -ALIGN 32 -rsaz_512_mul_by_one: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_rsaz_512_mul_by_one: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - - - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - - sub rsp,128+24 -$L$mul_by_one_body: - mov rbp,rdx - mov QWORD[128+rsp],rcx - - mov r8,QWORD[rsi] - pxor xmm0,xmm0 - mov r9,QWORD[8+rsi] - mov r10,QWORD[16+rsi] - mov r11,QWORD[24+rsi] - mov r12,QWORD[32+rsi] - mov r13,QWORD[40+rsi] - mov r14,QWORD[48+rsi] - mov r15,QWORD[56+rsi] - - movdqa XMMWORD[rsp],xmm0 - movdqa XMMWORD[16+rsp],xmm0 - movdqa XMMWORD[32+rsp],xmm0 - movdqa XMMWORD[48+rsp],xmm0 - movdqa XMMWORD[64+rsp],xmm0 - movdqa XMMWORD[80+rsp],xmm0 - movdqa XMMWORD[96+rsp],xmm0 - call __rsaz_512_reduce - mov QWORD[rdi],r8 - mov QWORD[8+rdi],r9 - mov QWORD[16+rdi],r10 - mov QWORD[24+rdi],r11 - mov QWORD[32+rdi],r12 - mov QWORD[40+rdi],r13 - mov QWORD[48+rdi],r14 - mov QWORD[56+rdi],r15 - - lea rax,[((128+24+48))+rsp] - mov r15,QWORD[((-48))+rax] - mov r14,QWORD[((-40))+rax] - mov r13,QWORD[((-32))+rax] - mov r12,QWORD[((-24))+rax] - mov rbp,QWORD[((-16))+rax] - mov rbx,QWORD[((-8))+rax] - lea rsp,[rax] -$L$mul_by_one_epilogue: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_rsaz_512_mul_by_one: - -ALIGN 32 -__rsaz_512_reduce: - mov rbx,r8 - imul rbx,QWORD[((128+8))+rsp] - mov rax,QWORD[rbp] - mov ecx,8 - jmp NEAR $L$reduction_loop - -ALIGN 32 -$L$reduction_loop: - mul rbx - mov rax,QWORD[8+rbp] - neg r8 - mov r8,rdx - adc r8,0 - - mul rbx - add r9,rax - mov rax,QWORD[16+rbp] - adc rdx,0 - add r8,r9 - mov r9,rdx - adc r9,0 - - mul rbx - add r10,rax - mov rax,QWORD[24+rbp] - adc rdx,0 - add r9,r10 - mov r10,rdx - adc r10,0 - - mul rbx - add r11,rax - mov rax,QWORD[32+rbp] - adc rdx,0 - add r10,r11 - mov rsi,QWORD[((128+8))+rsp] - - - adc rdx,0 - mov r11,rdx - - mul rbx - add r12,rax - mov rax,QWORD[40+rbp] - adc rdx,0 - imul rsi,r8 - add r11,r12 - mov r12,rdx - adc r12,0 - - mul rbx - add r13,rax - mov rax,QWORD[48+rbp] - adc rdx,0 - add r12,r13 - mov r13,rdx - adc r13,0 - - mul rbx - add r14,rax - mov rax,QWORD[56+rbp] - adc rdx,0 - add r13,r14 - mov r14,rdx - adc r14,0 - - mul rbx - mov rbx,rsi - add r15,rax - mov rax,QWORD[rbp] - adc rdx,0 - add r14,r15 - mov r15,rdx - adc r15,0 - - dec ecx - jne NEAR $L$reduction_loop - - DB 0F3h,0C3h ;repret - - -ALIGN 32 -__rsaz_512_subtract: - mov QWORD[rdi],r8 - mov QWORD[8+rdi],r9 - mov QWORD[16+rdi],r10 - mov QWORD[24+rdi],r11 - mov QWORD[32+rdi],r12 - mov QWORD[40+rdi],r13 - mov QWORD[48+rdi],r14 - mov QWORD[56+rdi],r15 - - mov r8,QWORD[rbp] - mov r9,QWORD[8+rbp] - neg r8 - not r9 - and r8,rcx - mov r10,QWORD[16+rbp] - and r9,rcx - not r10 - mov r11,QWORD[24+rbp] - and r10,rcx - not r11 - mov r12,QWORD[32+rbp] - and r11,rcx - not r12 - mov r13,QWORD[40+rbp] - and r12,rcx - not r13 - mov r14,QWORD[48+rbp] - and r13,rcx - not r14 - mov r15,QWORD[56+rbp] - and r14,rcx - not r15 - and r15,rcx - - add r8,QWORD[rdi] - adc r9,QWORD[8+rdi] - adc r10,QWORD[16+rdi] - adc r11,QWORD[24+rdi] - adc r12,QWORD[32+rdi] - adc r13,QWORD[40+rdi] - adc r14,QWORD[48+rdi] - adc r15,QWORD[56+rdi] - - mov QWORD[rdi],r8 - mov QWORD[8+rdi],r9 - mov QWORD[16+rdi],r10 - mov QWORD[24+rdi],r11 - mov QWORD[32+rdi],r12 - mov QWORD[40+rdi],r13 - mov QWORD[48+rdi],r14 - mov QWORD[56+rdi],r15 - - DB 0F3h,0C3h ;repret - - -ALIGN 32 -__rsaz_512_mul: - lea rdi,[8+rsp] - - mov rax,QWORD[rsi] - mul rbx - mov QWORD[rdi],rax - mov rax,QWORD[8+rsi] - mov r8,rdx - - mul rbx - add r8,rax - mov rax,QWORD[16+rsi] - mov r9,rdx - adc r9,0 - - mul rbx - add r9,rax - mov rax,QWORD[24+rsi] - mov r10,rdx - adc r10,0 - - mul rbx - add r10,rax - mov rax,QWORD[32+rsi] - mov r11,rdx - adc r11,0 - - mul rbx - add r11,rax - mov rax,QWORD[40+rsi] - mov r12,rdx - adc r12,0 - - mul rbx - add r12,rax - mov rax,QWORD[48+rsi] - mov r13,rdx - adc r13,0 - - mul rbx - add r13,rax - mov rax,QWORD[56+rsi] - mov r14,rdx - adc r14,0 - - mul rbx - add r14,rax - mov rax,QWORD[rsi] - mov r15,rdx - adc r15,0 - - lea rbp,[8+rbp] - lea rdi,[8+rdi] - - mov ecx,7 - jmp NEAR $L$oop_mul - -ALIGN 32 -$L$oop_mul: - mov rbx,QWORD[rbp] - mul rbx - add r8,rax - mov rax,QWORD[8+rsi] - mov QWORD[rdi],r8 - mov r8,rdx - adc r8,0 - - mul rbx - add r9,rax - mov rax,QWORD[16+rsi] - adc rdx,0 - add r8,r9 - mov r9,rdx - adc r9,0 - - mul rbx - add r10,rax - mov rax,QWORD[24+rsi] - adc rdx,0 - add r9,r10 - mov r10,rdx - adc r10,0 - - mul rbx - add r11,rax - mov rax,QWORD[32+rsi] - adc rdx,0 - add r10,r11 - mov r11,rdx - adc r11,0 - - mul rbx - add r12,rax - mov rax,QWORD[40+rsi] - adc rdx,0 - add r11,r12 - mov r12,rdx - adc r12,0 - - mul rbx - add r13,rax - mov rax,QWORD[48+rsi] - adc rdx,0 - add r12,r13 - mov r13,rdx - adc r13,0 - - mul rbx - add r14,rax - mov rax,QWORD[56+rsi] - adc rdx,0 - add r13,r14 - mov r14,rdx - lea rbp,[8+rbp] - adc r14,0 - - mul rbx - add r15,rax - mov rax,QWORD[rsi] - adc rdx,0 - add r14,r15 - mov r15,rdx - adc r15,0 - - lea rdi,[8+rdi] - - dec ecx - jnz NEAR $L$oop_mul - - mov QWORD[rdi],r8 - mov QWORD[8+rdi],r9 - mov QWORD[16+rdi],r10 - mov QWORD[24+rdi],r11 - mov QWORD[32+rdi],r12 - mov QWORD[40+rdi],r13 - mov QWORD[48+rdi],r14 - mov QWORD[56+rdi],r15 - - DB 0F3h,0C3h ;repret - -global rsaz_512_scatter4 - -ALIGN 16 -rsaz_512_scatter4: - lea rcx,[r8*8+rcx] - mov r9d,8 - jmp NEAR $L$oop_scatter -ALIGN 16 -$L$oop_scatter: - mov rax,QWORD[rdx] - lea rdx,[8+rdx] - mov QWORD[rcx],rax - lea rcx,[128+rcx] - dec r9d - jnz NEAR $L$oop_scatter - DB 0F3h,0C3h ;repret - - -global rsaz_512_gather4 - -ALIGN 16 -rsaz_512_gather4: -$L$SEH_begin_rsaz_512_gather4: -DB 0x48,0x81,0xec,0xa8,0x00,0x00,0x00 -DB 0x0f,0x29,0x34,0x24 -DB 0x0f,0x29,0x7c,0x24,0x10 -DB 0x44,0x0f,0x29,0x44,0x24,0x20 -DB 0x44,0x0f,0x29,0x4c,0x24,0x30 -DB 0x44,0x0f,0x29,0x54,0x24,0x40 -DB 0x44,0x0f,0x29,0x5c,0x24,0x50 -DB 0x44,0x0f,0x29,0x64,0x24,0x60 -DB 0x44,0x0f,0x29,0x6c,0x24,0x70 -DB 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0 -DB 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0 - movd xmm8,r8d - movdqa xmm1,XMMWORD[(($L$inc+16))] - movdqa xmm0,XMMWORD[$L$inc] - - pshufd xmm8,xmm8,0 - movdqa xmm7,xmm1 - movdqa xmm2,xmm1 - paddd xmm1,xmm0 - pcmpeqd xmm0,xmm8 - movdqa xmm3,xmm7 - paddd xmm2,xmm1 - pcmpeqd xmm1,xmm8 - movdqa xmm4,xmm7 - paddd xmm3,xmm2 - pcmpeqd xmm2,xmm8 - movdqa xmm5,xmm7 - paddd xmm4,xmm3 - pcmpeqd xmm3,xmm8 - movdqa xmm6,xmm7 - paddd xmm5,xmm4 - pcmpeqd xmm4,xmm8 - paddd xmm6,xmm5 - pcmpeqd xmm5,xmm8 - paddd xmm7,xmm6 - pcmpeqd xmm6,xmm8 - pcmpeqd xmm7,xmm8 - mov r9d,8 - jmp NEAR $L$oop_gather -ALIGN 16 -$L$oop_gather: - movdqa xmm8,XMMWORD[rdx] - movdqa xmm9,XMMWORD[16+rdx] - movdqa xmm10,XMMWORD[32+rdx] - movdqa xmm11,XMMWORD[48+rdx] - pand xmm8,xmm0 - movdqa xmm12,XMMWORD[64+rdx] - pand xmm9,xmm1 - movdqa xmm13,XMMWORD[80+rdx] - pand xmm10,xmm2 - movdqa xmm14,XMMWORD[96+rdx] - pand xmm11,xmm3 - movdqa xmm15,XMMWORD[112+rdx] - lea rdx,[128+rdx] - pand xmm12,xmm4 - pand xmm13,xmm5 - pand xmm14,xmm6 - pand xmm15,xmm7 - por xmm8,xmm10 - por xmm9,xmm11 - por xmm8,xmm12 - por xmm9,xmm13 - por xmm8,xmm14 - por xmm9,xmm15 - - por xmm8,xmm9 - pshufd xmm9,xmm8,0x4e - por xmm8,xmm9 - movq QWORD[rcx],xmm8 - lea rcx,[8+rcx] - dec r9d - jnz NEAR $L$oop_gather - movaps xmm6,XMMWORD[rsp] - movaps xmm7,XMMWORD[16+rsp] - movaps xmm8,XMMWORD[32+rsp] - movaps xmm9,XMMWORD[48+rsp] - movaps xmm10,XMMWORD[64+rsp] - movaps xmm11,XMMWORD[80+rsp] - movaps xmm12,XMMWORD[96+rsp] - movaps xmm13,XMMWORD[112+rsp] - movaps xmm14,XMMWORD[128+rsp] - movaps xmm15,XMMWORD[144+rsp] - add rsp,0xa8 - DB 0F3h,0C3h ;repret -$L$SEH_end_rsaz_512_gather4: - - -ALIGN 64 -$L$inc: - DD 0,0,1,1 - DD 2,2,2,2 -EXTERN __imp_RtlVirtualUnwind - -ALIGN 16 -se_handler: - push rsi - push rdi - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - pushfq - sub rsp,64 - - mov rax,QWORD[120+r8] - mov rbx,QWORD[248+r8] - - mov rsi,QWORD[8+r9] - mov r11,QWORD[56+r9] - - mov r10d,DWORD[r11] - lea r10,[r10*1+rsi] - cmp rbx,r10 - jb NEAR $L$common_seh_tail - - mov rax,QWORD[152+r8] - - mov r10d,DWORD[4+r11] - lea r10,[r10*1+rsi] - cmp rbx,r10 - jae NEAR $L$common_seh_tail - - lea rax,[((128+24+48))+rax] - - lea rbx,[$L$mul_gather4_epilogue] - cmp rbx,r10 - jne NEAR $L$se_not_in_mul_gather4 - - lea rax,[176+rax] - - lea rsi,[((-48-168))+rax] - lea rdi,[512+r8] - mov ecx,20 - DD 0xa548f3fc - -$L$se_not_in_mul_gather4: - mov rbx,QWORD[((-8))+rax] - mov rbp,QWORD[((-16))+rax] - mov r12,QWORD[((-24))+rax] - mov r13,QWORD[((-32))+rax] - mov r14,QWORD[((-40))+rax] - mov r15,QWORD[((-48))+rax] - mov QWORD[144+r8],rbx - mov QWORD[160+r8],rbp - mov QWORD[216+r8],r12 - mov QWORD[224+r8],r13 - mov QWORD[232+r8],r14 - mov QWORD[240+r8],r15 - -$L$common_seh_tail: - mov rdi,QWORD[8+rax] - mov rsi,QWORD[16+rax] - mov QWORD[152+r8],rax - mov QWORD[168+r8],rsi - mov QWORD[176+r8],rdi - - mov rdi,QWORD[40+r9] - mov rsi,r8 - mov ecx,154 - DD 0xa548f3fc - - mov rsi,r9 - xor rcx,rcx - mov rdx,QWORD[8+rsi] - mov r8,QWORD[rsi] - mov r9,QWORD[16+rsi] - mov r10,QWORD[40+rsi] - lea r11,[56+rsi] - lea r12,[24+rsi] - mov QWORD[32+rsp],r10 - mov QWORD[40+rsp],r11 - mov QWORD[48+rsp],r12 - mov QWORD[56+rsp],rcx - call QWORD[__imp_RtlVirtualUnwind] - - mov eax,1 - add rsp,64 - popfq - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - pop rdi - pop rsi - DB 0F3h,0C3h ;repret - - -section .pdata rdata align=4 -ALIGN 4 - DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase - DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase - DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase - - DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase - DD $L$SEH_end_rsaz_512_mul wrt ..imagebase - DD $L$SEH_info_rsaz_512_mul wrt ..imagebase - - DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase - DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase - DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase - - DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase - DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase - DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase - - DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase - DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase - DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase - - DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase - DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase - DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase - -section .xdata rdata align=8 -ALIGN 8 -$L$SEH_info_rsaz_512_sqr: -DB 9,0,0,0 - DD se_handler wrt ..imagebase - DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase -$L$SEH_info_rsaz_512_mul: -DB 9,0,0,0 - DD se_handler wrt ..imagebase - DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase -$L$SEH_info_rsaz_512_mul_gather4: -DB 9,0,0,0 - DD se_handler wrt ..imagebase - DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase -$L$SEH_info_rsaz_512_mul_scatter4: -DB 9,0,0,0 - DD se_handler wrt ..imagebase - DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase -$L$SEH_info_rsaz_512_mul_by_one: -DB 9,0,0,0 - DD se_handler wrt ..imagebase - DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase -$L$SEH_info_rsaz_512_gather4: -DB 0x01,0x46,0x16,0x00 -DB 0x46,0xf8,0x09,0x00 -DB 0x3d,0xe8,0x08,0x00 -DB 0x34,0xd8,0x07,0x00 -DB 0x2e,0xc8,0x06,0x00 -DB 0x28,0xb8,0x05,0x00 -DB 0x22,0xa8,0x04,0x00 -DB 0x1c,0x98,0x03,0x00 -DB 0x16,0x88,0x02,0x00 -DB 0x10,0x78,0x01,0x00 -DB 0x0b,0x68,0x00,0x00 -DB 0x07,0x01,0x15,0x00 |