diff options
Diffstat (limited to 'win-x86/crypto/bn/x86-mont.asm')
-rw-r--r-- | win-x86/crypto/bn/x86-mont.asm | 120 |
1 files changed, 68 insertions, 52 deletions
diff --git a/win-x86/crypto/bn/x86-mont.asm b/win-x86/crypto/bn/x86-mont.asm index de7b9499..b1a4d594 100644 --- a/win-x86/crypto/bn/x86-mont.asm +++ b/win-x86/crypto/bn/x86-mont.asm @@ -29,36 +29,51 @@ L$_bn_mul_mont_begin: jl NEAR L$000just_leave lea esi,[20+esp] lea edx,[24+esp] - mov ebp,esp add edi,2 neg edi - lea esp,[edi*4+esp-32] + lea ebp,[edi*4+esp-32] neg edi - mov eax,esp + mov eax,ebp sub eax,edx and eax,2047 - sub esp,eax - xor edx,esp + sub ebp,eax + xor edx,ebp and edx,2048 xor edx,2048 - sub esp,edx - and esp,-64 + sub ebp,edx + and ebp,-64 + mov eax,esp + sub eax,ebp + and eax,-4096 + mov edx,esp + lea esp,[eax*1+ebp] + mov eax,DWORD [esp] + cmp esp,ebp + ja NEAR L$001page_walk + jmp NEAR L$002page_walk_done +align 16 +L$001page_walk: + lea esp,[esp-4096] + mov eax,DWORD [esp] + cmp esp,ebp + ja NEAR L$001page_walk +L$002page_walk_done: mov eax,DWORD [esi] mov ebx,DWORD [4+esi] mov ecx,DWORD [8+esi] - mov edx,DWORD [12+esi] + mov ebp,DWORD [12+esi] mov esi,DWORD [16+esi] mov esi,DWORD [esi] mov DWORD [4+esp],eax mov DWORD [8+esp],ebx mov DWORD [12+esp],ecx - mov DWORD [16+esp],edx + mov DWORD [16+esp],ebp mov DWORD [20+esp],esi lea ebx,[edi-3] - mov DWORD [24+esp],ebp + mov DWORD [24+esp],edx lea eax,[_OPENSSL_ia32cap_P] bt DWORD [eax],26 - jnc NEAR L$001non_sse2 + jnc NEAR L$003non_sse2 mov eax,-1 movd mm7,eax mov esi,DWORD [8+esp] @@ -82,7 +97,7 @@ L$_bn_mul_mont_begin: psrlq mm3,32 inc ecx align 16 -L$0021st: +L$0041st: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -97,7 +112,7 @@ L$0021st: psrlq mm3,32 lea ecx,[1+ecx] cmp ecx,ebx - jl NEAR L$0021st + jl NEAR L$0041st pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -111,7 +126,7 @@ L$0021st: paddq mm3,mm2 movq [32+ebx*4+esp],mm3 inc edx -L$003outer: +L$005outer: xor ecx,ecx movd mm4,DWORD [edx*4+edi] movd mm5,DWORD [esi] @@ -133,7 +148,7 @@ L$003outer: paddq mm2,mm6 inc ecx dec ebx -L$004inner: +L$006inner: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -150,7 +165,7 @@ L$004inner: paddq mm2,mm6 dec ebx lea ecx,[1+ecx] - jnz NEAR L$004inner + jnz NEAR L$006inner mov ebx,ecx pmuludq mm0,mm4 pmuludq mm1,mm5 @@ -168,11 +183,11 @@ L$004inner: movq [32+ebx*4+esp],mm3 lea edx,[1+edx] cmp edx,ebx - jle NEAR L$003outer + jle NEAR L$005outer emms - jmp NEAR L$005common_tail + jmp NEAR L$007common_tail align 16 -L$001non_sse2: +L$003non_sse2: mov esi,DWORD [8+esp] lea ebp,[1+ebx] mov edi,DWORD [12+esp] @@ -183,12 +198,12 @@ L$001non_sse2: lea eax,[4+ebx*4+edi] or ebp,edx mov edi,DWORD [edi] - jz NEAR L$006bn_sqr_mont + jz NEAR L$008bn_sqr_mont mov DWORD [28+esp],eax mov eax,DWORD [esi] xor edx,edx align 16 -L$007mull: +L$009mull: mov ebp,edx mul edi add ebp,eax @@ -197,7 +212,7 @@ L$007mull: mov eax,DWORD [ecx*4+esi] cmp ecx,ebx mov DWORD [28+ecx*4+esp],ebp - jl NEAR L$007mull + jl NEAR L$009mull mov ebp,edx mul edi mov edi,DWORD [20+esp] @@ -215,9 +230,9 @@ L$007mull: mov eax,DWORD [4+esi] adc edx,0 inc ecx - jmp NEAR L$0082ndmadd + jmp NEAR L$0102ndmadd align 16 -L$0091stmadd: +L$0111stmadd: mov ebp,edx mul edi add ebp,DWORD [32+ecx*4+esp] @@ -228,7 +243,7 @@ L$0091stmadd: adc edx,0 cmp ecx,ebx mov DWORD [28+ecx*4+esp],ebp - jl NEAR L$0091stmadd + jl NEAR L$0111stmadd mov ebp,edx mul edi add eax,DWORD [32+ebx*4+esp] @@ -251,7 +266,7 @@ L$0091stmadd: adc edx,0 mov ecx,1 align 16 -L$0082ndmadd: +L$0102ndmadd: mov ebp,edx mul edi add ebp,DWORD [32+ecx*4+esp] @@ -262,7 +277,7 @@ L$0082ndmadd: adc edx,0 cmp ecx,ebx mov DWORD [24+ecx*4+esp],ebp - jl NEAR L$0082ndmadd + jl NEAR L$0102ndmadd mov ebp,edx mul edi add ebp,DWORD [32+ebx*4+esp] @@ -278,16 +293,16 @@ L$0082ndmadd: mov DWORD [32+ebx*4+esp],edx cmp ecx,DWORD [28+esp] mov DWORD [36+ebx*4+esp],eax - je NEAR L$005common_tail + je NEAR L$007common_tail mov edi,DWORD [ecx] mov esi,DWORD [8+esp] mov DWORD [12+esp],ecx xor ecx,ecx xor edx,edx mov eax,DWORD [esi] - jmp NEAR L$0091stmadd + jmp NEAR L$0111stmadd align 16 -L$006bn_sqr_mont: +L$008bn_sqr_mont: mov DWORD [esp],ebx mov DWORD [12+esp],ecx mov eax,edi @@ -298,7 +313,7 @@ L$006bn_sqr_mont: and ebx,1 inc ecx align 16 -L$010sqr: +L$012sqr: mov eax,DWORD [ecx*4+esi] mov ebp,edx mul edi @@ -310,7 +325,7 @@ L$010sqr: cmp ecx,DWORD [esp] mov ebx,eax mov DWORD [28+ecx*4+esp],ebp - jl NEAR L$010sqr + jl NEAR L$012sqr mov eax,DWORD [ecx*4+esi] mov ebp,edx mul edi @@ -334,7 +349,7 @@ L$010sqr: mov eax,DWORD [4+esi] mov ecx,1 align 16 -L$0113rdmadd: +L$0133rdmadd: mov ebp,edx mul edi add ebp,DWORD [32+ecx*4+esp] @@ -353,7 +368,7 @@ L$0113rdmadd: adc edx,0 cmp ecx,ebx mov DWORD [24+ecx*4+esp],ebp - jl NEAR L$0113rdmadd + jl NEAR L$0133rdmadd mov ebp,edx mul edi add ebp,DWORD [32+ebx*4+esp] @@ -369,7 +384,7 @@ L$0113rdmadd: mov DWORD [32+ebx*4+esp],edx cmp ecx,ebx mov DWORD [36+ebx*4+esp],eax - je NEAR L$005common_tail + je NEAR L$007common_tail mov edi,DWORD [4+ecx*4+esi] lea ecx,[1+ecx] mov eax,edi @@ -381,12 +396,12 @@ L$0113rdmadd: xor ebp,ebp cmp ecx,ebx lea ecx,[1+ecx] - je NEAR L$012sqrlast + je NEAR L$014sqrlast mov ebx,edx shr edx,1 and ebx,1 align 16 -L$013sqradd: +L$015sqradd: mov eax,DWORD [ecx*4+esi] mov ebp,edx mul edi @@ -402,13 +417,13 @@ L$013sqradd: cmp ecx,DWORD [esp] mov DWORD [28+ecx*4+esp],ebp mov ebx,eax - jle NEAR L$013sqradd + jle NEAR L$015sqradd mov ebp,edx add edx,edx shr ebp,31 add edx,ebx adc ebp,0 -L$012sqrlast: +L$014sqrlast: mov edi,DWORD [20+esp] mov esi,DWORD [16+esp] imul edi,DWORD [32+esp] @@ -423,9 +438,9 @@ L$012sqrlast: adc edx,0 mov ecx,1 mov eax,DWORD [4+esi] - jmp NEAR L$0113rdmadd + jmp NEAR L$0133rdmadd align 16 -L$005common_tail: +L$007common_tail: mov ebp,DWORD [16+esp] mov edi,DWORD [4+esp] lea esi,[32+esp] @@ -433,25 +448,26 @@ L$005common_tail: mov ecx,ebx xor edx,edx align 16 -L$014sub: +L$016sub: sbb eax,DWORD [edx*4+ebp] mov DWORD [edx*4+edi],eax dec ecx mov eax,DWORD [4+edx*4+esi] lea edx,[1+edx] - jge NEAR L$014sub + jge NEAR L$016sub sbb eax,0 + and esi,eax + not eax + mov ebp,edi + and ebp,eax + or esi,ebp align 16 -L$015copy: - mov edx,DWORD [ebx*4+esi] - mov ebp,DWORD [ebx*4+edi] - xor edx,ebp - and edx,eax - xor edx,ebp - mov DWORD [ebx*4+esi],ecx - mov DWORD [ebx*4+edi],edx +L$017copy: + mov eax,DWORD [ebx*4+esi] + mov DWORD [ebx*4+edi],eax + mov DWORD [32+ebx*4+esp],ecx dec ebx - jge NEAR L$015copy + jge NEAR L$017copy mov esp,DWORD [24+esp] mov eax,1 L$000just_leave: |