diff options
Diffstat (limited to 'mac-x86/crypto/bn/x86-mont.S')
-rw-r--r-- | mac-x86/crypto/bn/x86-mont.S | 126 |
1 files changed, 71 insertions, 55 deletions
diff --git a/mac-x86/crypto/bn/x86-mont.S b/mac-x86/crypto/bn/x86-mont.S index 234034b0..5c13ca4d 100644 --- a/mac-x86/crypto/bn/x86-mont.S +++ b/mac-x86/crypto/bn/x86-mont.S @@ -16,39 +16,54 @@ L_bn_mul_mont_begin: jl L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax + andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk + jmp L002page_walk_done +.align 4,0x90 +L001page_walk: + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk +L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) - call L001PIC_me_up -L001PIC_me_up: + movl %edx,24(%esp) + call L003PIC_me_up +L003PIC_me_up: popl %eax - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax btl $26,(%eax) - jnc L002non_sse2 + jnc L004non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -72,7 +87,7 @@ L001PIC_me_up: psrlq $32,%mm3 incl %ecx .align 4,0x90 -L0031st: +L0051st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -87,7 +102,7 @@ L0031st: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl L0031st + jl L0051st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -101,7 +116,7 @@ L0031st: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -L004outer: +L006outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -123,7 +138,7 @@ L004outer: paddq %mm6,%mm2 incl %ecx decl %ebx -L005inner: +L007inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -140,7 +155,7 @@ L005inner: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz L005inner + jnz L007inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -158,11 +173,11 @@ L005inner: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle L004outer + jle L006outer emms - jmp L006common_tail + jmp L008common_tail .align 4,0x90 -L002non_sse2: +L004non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -173,12 +188,12 @@ L002non_sse2: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz L007bn_sqr_mont + jz L009bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 4,0x90 -L008mull: +L010mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -187,7 +202,7 @@ L008mull: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L008mull + jl L010mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -205,9 +220,9 @@ L008mull: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp L0092ndmadd + jmp L0112ndmadd .align 4,0x90 -L0101stmadd: +L0121stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -218,7 +233,7 @@ L0101stmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L0101stmadd + jl L0121stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -241,7 +256,7 @@ L0101stmadd: adcl $0,%edx movl $1,%ecx .align 4,0x90 -L0092ndmadd: +L0112ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -252,7 +267,7 @@ L0092ndmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0092ndmadd + jl L0112ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -268,16 +283,16 @@ L0092ndmadd: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je L006common_tail + je L008common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp L0101stmadd + jmp L0121stmadd .align 4,0x90 -L007bn_sqr_mont: +L009bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -288,7 +303,7 @@ L007bn_sqr_mont: andl $1,%ebx incl %ecx .align 4,0x90 -L011sqr: +L013sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -300,7 +315,7 @@ L011sqr: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl L011sqr + jl L013sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -324,7 +339,7 @@ L011sqr: movl 4(%esi),%eax movl $1,%ecx .align 4,0x90 -L0123rdmadd: +L0143rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -343,7 +358,7 @@ L0123rdmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0123rdmadd + jl L0143rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -359,7 +374,7 @@ L0123rdmadd: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je L006common_tail + je L008common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -371,12 +386,12 @@ L0123rdmadd: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je L013sqrlast + je L015sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 4,0x90 -L014sqradd: +L016sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -392,13 +407,13 @@ L014sqradd: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle L014sqradd + jle L016sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -L013sqrlast: +L015sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -413,9 +428,9 @@ L013sqrlast: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp L0123rdmadd + jmp L0143rdmadd .align 4,0x90 -L006common_tail: +L008common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -423,25 +438,26 @@ L006common_tail: movl %ebx,%ecx xorl %edx,%edx .align 4,0x90 -L015sub: +L017sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge L015sub + jge L017sub sbbl $0,%eax + andl %eax,%esi + notl %eax + movl %edi,%ebp + andl %eax,%ebp + orl %ebp,%esi .align 4,0x90 -L016copy: - movl (%esi,%ebx,4),%edx - movl (%edi,%ebx,4),%ebp - xorl %ebp,%edx - andl %eax,%edx - xorl %ebp,%edx - movl %ecx,(%esi,%ebx,4) - movl %edx,(%edi,%ebx,4) +L018copy: + movl (%esi,%ebx,4),%eax + movl %eax,(%edi,%ebx,4) + movl %ecx,32(%esp,%ebx,4) decl %ebx - jge L016copy + jge L018copy movl 24(%esp),%esp movl $1,%eax L000just_leave: |