diff options
author | David Benjamin <davidben@google.com> | 2016-04-22 15:02:23 -0400 |
---|---|---|
committer | David Benjamin <davidben@google.com> | 2016-04-29 16:36:16 -0400 |
commit | 4969cc9b0ab2905ec478277f50ed3849b37a6c6b (patch) | |
tree | 552fde383dce1efd213ae145fff808806d76a225 /linux-x86/crypto | |
parent | 09f2501f7faf115dc26e0c2310b3ea8c97f66007 (diff) | |
download | boringssl-4969cc9b0ab2905ec478277f50ed3849b37a6c6b.tar.gz |
external/boringssl: Sync to d18cb77.
This includes the following changes which are far too many to list here:
https://boringssl.googlesource.com/boringssl/+log/7b8b9c17db93ea5287575b437c77fb36eeb81b31..d18cb77864dcc4b5c7cb08c2331008c01165f34f
This also retires one function from android_compat_hacks.c which is no longer
necessary.
Change-Id: Ie00536d7ad815464b2b031f7bcd1b683e12c1623
Diffstat (limited to 'linux-x86/crypto')
-rw-r--r-- | linux-x86/crypto/chacha/chacha-x86.S | 989 | ||||
-rw-r--r-- | linux-x86/crypto/rc4/rc4-586.S | 35 |
2 files changed, 989 insertions, 35 deletions
diff --git a/linux-x86/crypto/chacha/chacha-x86.S b/linux-x86/crypto/chacha/chacha-x86.S new file mode 100644 index 00000000..f28459e9 --- /dev/null +++ b/linux-x86/crypto/chacha/chacha-x86.S @@ -0,0 +1,989 @@ +#if defined(__i386__) +.file "chacha-x86.S" +.text +.globl ChaCha20_ctr32 +.hidden ChaCha20_ctr32 +.type ChaCha20_ctr32,@function +.align 16 +ChaCha20_ctr32: +.L_ChaCha20_ctr32_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + cmpl 28(%esp),%eax + je .L000no_data + call .Lpic_point +.Lpic_point: + popl %eax + leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp + testl $16777216,(%ebp) + jz .L001x86 + testl $512,4(%ebp) + jz .L001x86 + jmp .Lssse3_shortcut +.L001x86: + movl 32(%esp),%esi + movl 36(%esp),%edi + subl $132,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,80(%esp) + movl %ebx,84(%esp) + movl %ecx,88(%esp) + movl %edx,92(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,96(%esp) + movl %ebx,100(%esp) + movl %ecx,104(%esp) + movl %edx,108(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + subl $1,%eax + movl %eax,112(%esp) + movl %ebx,116(%esp) + movl %ecx,120(%esp) + movl %edx,124(%esp) + jmp .L002entry +.align 16 +.L003outer_loop: + movl %ebx,156(%esp) + movl %eax,152(%esp) + movl %ecx,160(%esp) +.L002entry: + movl $1634760805,%eax + movl $857760878,4(%esp) + movl $2036477234,8(%esp) + movl $1797285236,12(%esp) + movl 84(%esp),%ebx + movl 88(%esp),%ebp + movl 104(%esp),%ecx + movl 108(%esp),%esi + movl 116(%esp),%edx + movl 120(%esp),%edi + movl %ebx,20(%esp) + movl %ebp,24(%esp) + movl %ecx,40(%esp) + movl %esi,44(%esp) + movl %edx,52(%esp) + movl %edi,56(%esp) + movl 92(%esp),%ebx + movl 124(%esp),%edi + movl 112(%esp),%edx + movl 80(%esp),%ebp + movl 96(%esp),%ecx + movl 100(%esp),%esi + addl $1,%edx + movl %ebx,28(%esp) + movl %edi,60(%esp) + movl %edx,112(%esp) + movl $10,%ebx + jmp .L004loop +.align 16 +.L004loop: + addl %ebp,%eax + movl %ebx,128(%esp) + movl %ebp,%ebx + xorl %eax,%edx + roll $16,%edx + addl %edx,%ecx + xorl %ecx,%ebx + movl 52(%esp),%edi + roll $12,%ebx + movl 20(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,(%esp) + roll $8,%edx + movl 4(%esp),%eax + addl %edx,%ecx + movl %edx,48(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + movl %ecx,32(%esp) + roll $16,%edi + movl %ebx,16(%esp) + addl %edi,%esi + movl 40(%esp),%ecx + xorl %esi,%ebp + movl 56(%esp),%edx + roll $12,%ebp + movl 24(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,4(%esp) + roll $8,%edi + movl 8(%esp),%eax + addl %edi,%esi + movl %edi,52(%esp) + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + movl %esi,36(%esp) + roll $16,%edx + movl %ebp,20(%esp) + addl %edx,%ecx + movl 44(%esp),%esi + xorl %ecx,%ebx + movl 60(%esp),%edi + roll $12,%ebx + movl 28(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,8(%esp) + roll $8,%edx + movl 12(%esp),%eax + addl %edx,%ecx + movl %edx,56(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + roll $16,%edi + movl %ebx,24(%esp) + addl %edi,%esi + xorl %esi,%ebp + roll $12,%ebp + movl 20(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,12(%esp) + roll $8,%edi + movl (%esp),%eax + addl %edi,%esi + movl %edi,%edx + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + roll $16,%edx + movl %ebp,28(%esp) + addl %edx,%ecx + xorl %ecx,%ebx + movl 48(%esp),%edi + roll $12,%ebx + movl 24(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,(%esp) + roll $8,%edx + movl 4(%esp),%eax + addl %edx,%ecx + movl %edx,60(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + movl %ecx,40(%esp) + roll $16,%edi + movl %ebx,20(%esp) + addl %edi,%esi + movl 32(%esp),%ecx + xorl %esi,%ebp + movl 52(%esp),%edx + roll $12,%ebp + movl 28(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,4(%esp) + roll $8,%edi + movl 8(%esp),%eax + addl %edi,%esi + movl %edi,48(%esp) + xorl %esi,%ebp + addl %ebx,%eax + roll $7,%ebp + xorl %eax,%edx + movl %esi,44(%esp) + roll $16,%edx + movl %ebp,24(%esp) + addl %edx,%ecx + movl 36(%esp),%esi + xorl %ecx,%ebx + movl 56(%esp),%edi + roll $12,%ebx + movl 16(%esp),%ebp + addl %ebx,%eax + xorl %eax,%edx + movl %eax,8(%esp) + roll $8,%edx + movl 12(%esp),%eax + addl %edx,%ecx + movl %edx,52(%esp) + xorl %ecx,%ebx + addl %ebp,%eax + roll $7,%ebx + xorl %eax,%edi + roll $16,%edi + movl %ebx,28(%esp) + addl %edi,%esi + xorl %esi,%ebp + movl 48(%esp),%edx + roll $12,%ebp + movl 128(%esp),%ebx + addl %ebp,%eax + xorl %eax,%edi + movl %eax,12(%esp) + roll $8,%edi + movl (%esp),%eax + addl %edi,%esi + movl %edi,56(%esp) + xorl %esi,%ebp + roll $7,%ebp + decl %ebx + jnz .L004loop + movl 160(%esp),%ebx + addl $1634760805,%eax + addl 80(%esp),%ebp + addl 96(%esp),%ecx + addl 100(%esp),%esi + cmpl $64,%ebx + jb .L005tail + movl 156(%esp),%ebx + addl 112(%esp),%edx + addl 120(%esp),%edi + xorl (%ebx),%eax + xorl 16(%ebx),%ebp + movl %eax,(%esp) + movl 152(%esp),%eax + xorl 32(%ebx),%ecx + xorl 36(%ebx),%esi + xorl 48(%ebx),%edx + xorl 56(%ebx),%edi + movl %ebp,16(%esp) + movl (%esp),%ebp + movl %ecx,32(%esp) + movl %esi,36(%esp) + movl %edx,48(%esp) + movl %edi,56(%esp) + movl %ebp,(%eax) + movl 4(%esp),%ebp + movl 8(%esp),%ecx + movl 12(%esp),%esi + movl 20(%esp),%edx + movl 24(%esp),%edi + addl $857760878,%ebp + addl $2036477234,%ecx + addl $1797285236,%esi + addl 84(%esp),%edx + addl 88(%esp),%edi + xorl 4(%ebx),%ebp + xorl 8(%ebx),%ecx + xorl 12(%ebx),%esi + xorl 20(%ebx),%edx + xorl 24(%ebx),%edi + movl %ebp,4(%eax) + movl 16(%esp),%ebp + movl %ecx,8(%eax) + movl %esi,12(%eax) + movl %ebp,16(%eax) + movl %edx,20(%eax) + movl %edi,24(%eax) + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl 36(%esp),%edi + addl 92(%esp),%ecx + movl 40(%esp),%ebp + xorl 28(%ebx),%ecx + movl 44(%esp),%esi + movl %ecx,28(%eax) + movl %edx,32(%eax) + movl %edi,36(%eax) + addl 104(%esp),%ebp + addl 108(%esp),%esi + xorl 40(%ebx),%ebp + xorl 44(%ebx),%esi + movl %ebp,40(%eax) + movl %esi,44(%eax) + movl 48(%esp),%ecx + movl 56(%esp),%esi + movl 52(%esp),%edx + movl 60(%esp),%edi + addl 116(%esp),%edx + addl 124(%esp),%edi + xorl 52(%ebx),%edx + xorl 60(%ebx),%edi + leal 64(%ebx),%ebx + movl %ecx,48(%eax) + movl 160(%esp),%ecx + movl %edx,52(%eax) + movl %esi,56(%eax) + movl %edi,60(%eax) + leal 64(%eax),%eax + subl $64,%ecx + jnz .L003outer_loop + jmp .L006done +.L005tail: + addl 112(%esp),%edx + addl 120(%esp),%edi + movl %eax,(%esp) + movl %ebp,16(%esp) + movl %ecx,32(%esp) + movl %esi,36(%esp) + movl %edx,48(%esp) + movl %edi,56(%esp) + movl 4(%esp),%ebp + movl 8(%esp),%ecx + movl 12(%esp),%esi + movl 20(%esp),%edx + movl 24(%esp),%edi + addl $857760878,%ebp + addl $2036477234,%ecx + addl $1797285236,%esi + addl 84(%esp),%edx + addl 88(%esp),%edi + movl %ebp,4(%esp) + movl %ecx,8(%esp) + movl %esi,12(%esp) + movl %edx,20(%esp) + movl %edi,24(%esp) + movl 28(%esp),%ebp + movl 40(%esp),%ecx + movl 44(%esp),%esi + movl 52(%esp),%edx + movl 60(%esp),%edi + addl 92(%esp),%ebp + addl 104(%esp),%ecx + addl 108(%esp),%esi + addl 116(%esp),%edx + addl 124(%esp),%edi + movl %ebp,28(%esp) + movl 156(%esp),%ebp + movl %ecx,40(%esp) + movl 152(%esp),%ecx + movl %esi,44(%esp) + xorl %esi,%esi + movl %edx,52(%esp) + movl %edi,60(%esp) + xorl %eax,%eax + xorl %edx,%edx +.L007tail_loop: + movb (%esi,%ebp,1),%al + movb (%esp,%esi,1),%dl + leal 1(%esi),%esi + xorb %dl,%al + movb %al,-1(%ecx,%esi,1) + decl %ebx + jnz .L007tail_loop +.L006done: + addl $132,%esp +.L000no_data: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin +.globl ChaCha20_ssse3 +.hidden ChaCha20_ssse3 +.type ChaCha20_ssse3,@function +.align 16 +ChaCha20_ssse3: +.L_ChaCha20_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +.Lssse3_shortcut: + movl 20(%esp),%edi + movl 24(%esp),%esi + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $524,%esp + andl $-64,%esp + movl %ebp,512(%esp) + leal .Lssse3_data-.Lpic_point(%eax),%eax + movdqu (%ebx),%xmm3 + cmpl $256,%ecx + jb .L0081x + movl %edx,516(%esp) + movl %ebx,520(%esp) + subl $256,%ecx + leal 384(%esp),%ebp + movdqu (%edx),%xmm7 + pshufd $0,%xmm3,%xmm0 + pshufd $85,%xmm3,%xmm1 + pshufd $170,%xmm3,%xmm2 + pshufd $255,%xmm3,%xmm3 + paddd 48(%eax),%xmm0 + pshufd $0,%xmm7,%xmm4 + pshufd $85,%xmm7,%xmm5 + psubd 64(%eax),%xmm0 + pshufd $170,%xmm7,%xmm6 + pshufd $255,%xmm7,%xmm7 + movdqa %xmm0,64(%ebp) + movdqa %xmm1,80(%ebp) + movdqa %xmm2,96(%ebp) + movdqa %xmm3,112(%ebp) + movdqu 16(%edx),%xmm3 + movdqa %xmm4,-64(%ebp) + movdqa %xmm5,-48(%ebp) + movdqa %xmm6,-32(%ebp) + movdqa %xmm7,-16(%ebp) + movdqa 32(%eax),%xmm7 + leal 128(%esp),%ebx + pshufd $0,%xmm3,%xmm0 + pshufd $85,%xmm3,%xmm1 + pshufd $170,%xmm3,%xmm2 + pshufd $255,%xmm3,%xmm3 + pshufd $0,%xmm7,%xmm4 + pshufd $85,%xmm7,%xmm5 + pshufd $170,%xmm7,%xmm6 + pshufd $255,%xmm7,%xmm7 + movdqa %xmm0,(%ebp) + movdqa %xmm1,16(%ebp) + movdqa %xmm2,32(%ebp) + movdqa %xmm3,48(%ebp) + movdqa %xmm4,-128(%ebp) + movdqa %xmm5,-112(%ebp) + movdqa %xmm6,-96(%ebp) + movdqa %xmm7,-80(%ebp) + leal 128(%esi),%esi + leal 128(%edi),%edi + jmp .L009outer_loop +.align 16 +.L009outer_loop: + movdqa -112(%ebp),%xmm1 + movdqa -96(%ebp),%xmm2 + movdqa -80(%ebp),%xmm3 + movdqa -48(%ebp),%xmm5 + movdqa -32(%ebp),%xmm6 + movdqa -16(%ebp),%xmm7 + movdqa %xmm1,-112(%ebx) + movdqa %xmm2,-96(%ebx) + movdqa %xmm3,-80(%ebx) + movdqa %xmm5,-48(%ebx) + movdqa %xmm6,-32(%ebx) + movdqa %xmm7,-16(%ebx) + movdqa 32(%ebp),%xmm2 + movdqa 48(%ebp),%xmm3 + movdqa 64(%ebp),%xmm4 + movdqa 80(%ebp),%xmm5 + movdqa 96(%ebp),%xmm6 + movdqa 112(%ebp),%xmm7 + paddd 64(%eax),%xmm4 + movdqa %xmm2,32(%ebx) + movdqa %xmm3,48(%ebx) + movdqa %xmm4,64(%ebx) + movdqa %xmm5,80(%ebx) + movdqa %xmm6,96(%ebx) + movdqa %xmm7,112(%ebx) + movdqa %xmm4,64(%ebp) + movdqa -128(%ebp),%xmm0 + movdqa %xmm4,%xmm6 + movdqa -64(%ebp),%xmm3 + movdqa (%ebp),%xmm4 + movdqa 16(%ebp),%xmm5 + movl $10,%edx + nop +.align 16 +.L010loop: + paddd %xmm3,%xmm0 + movdqa %xmm3,%xmm2 + pxor %xmm0,%xmm6 + pshufb (%eax),%xmm6 + paddd %xmm6,%xmm4 + pxor %xmm4,%xmm2 + movdqa -48(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -112(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 80(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-128(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,64(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + movdqa %xmm4,(%ebx) + pshufb (%eax),%xmm7 + movdqa %xmm2,-64(%ebx) + paddd %xmm7,%xmm5 + movdqa 32(%ebx),%xmm4 + pxor %xmm5,%xmm3 + movdqa -32(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -96(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 96(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-112(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,80(%ebx) + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + movdqa %xmm5,16(%ebx) + pshufb (%eax),%xmm6 + movdqa %xmm3,-48(%ebx) + paddd %xmm6,%xmm4 + movdqa 48(%ebx),%xmm5 + pxor %xmm4,%xmm2 + movdqa -16(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -80(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 112(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-96(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,96(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + pshufb (%eax),%xmm7 + movdqa %xmm2,-32(%ebx) + paddd %xmm7,%xmm5 + pxor %xmm5,%xmm3 + movdqa -48(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -128(%ebx),%xmm0 + paddd %xmm3,%xmm1 + pxor %xmm1,%xmm7 + movdqa %xmm1,-80(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,%xmm6 + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + pshufb (%eax),%xmm6 + movdqa %xmm3,-16(%ebx) + paddd %xmm6,%xmm4 + pxor %xmm4,%xmm2 + movdqa -32(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -112(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 64(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-128(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,112(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + movdqa %xmm4,32(%ebx) + pshufb (%eax),%xmm7 + movdqa %xmm2,-48(%ebx) + paddd %xmm7,%xmm5 + movdqa (%ebx),%xmm4 + pxor %xmm5,%xmm3 + movdqa -16(%ebx),%xmm2 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -96(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 80(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-112(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,64(%ebx) + pxor %xmm5,%xmm3 + paddd %xmm2,%xmm0 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + pxor %xmm0,%xmm6 + por %xmm1,%xmm3 + movdqa %xmm5,48(%ebx) + pshufb (%eax),%xmm6 + movdqa %xmm3,-32(%ebx) + paddd %xmm6,%xmm4 + movdqa 16(%ebx),%xmm5 + pxor %xmm4,%xmm2 + movdqa -64(%ebx),%xmm3 + movdqa %xmm2,%xmm1 + pslld $12,%xmm2 + psrld $20,%xmm1 + por %xmm1,%xmm2 + movdqa -80(%ebx),%xmm1 + paddd %xmm2,%xmm0 + movdqa 96(%ebx),%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm0,-96(%ebx) + pshufb 16(%eax),%xmm6 + paddd %xmm6,%xmm4 + movdqa %xmm6,80(%ebx) + pxor %xmm4,%xmm2 + paddd %xmm3,%xmm1 + movdqa %xmm2,%xmm0 + pslld $7,%xmm2 + psrld $25,%xmm0 + pxor %xmm1,%xmm7 + por %xmm0,%xmm2 + pshufb (%eax),%xmm7 + movdqa %xmm2,-16(%ebx) + paddd %xmm7,%xmm5 + pxor %xmm5,%xmm3 + movdqa %xmm3,%xmm0 + pslld $12,%xmm3 + psrld $20,%xmm0 + por %xmm0,%xmm3 + movdqa -128(%ebx),%xmm0 + paddd %xmm3,%xmm1 + movdqa 64(%ebx),%xmm6 + pxor %xmm1,%xmm7 + movdqa %xmm1,-80(%ebx) + pshufb 16(%eax),%xmm7 + paddd %xmm7,%xmm5 + movdqa %xmm7,96(%ebx) + pxor %xmm5,%xmm3 + movdqa %xmm3,%xmm1 + pslld $7,%xmm3 + psrld $25,%xmm1 + por %xmm1,%xmm3 + decl %edx + jnz .L010loop + movdqa %xmm3,-64(%ebx) + movdqa %xmm4,(%ebx) + movdqa %xmm5,16(%ebx) + movdqa %xmm6,64(%ebx) + movdqa %xmm7,96(%ebx) + movdqa -112(%ebx),%xmm1 + movdqa -96(%ebx),%xmm2 + movdqa -80(%ebx),%xmm3 + paddd -128(%ebp),%xmm0 + paddd -112(%ebp),%xmm1 + paddd -96(%ebp),%xmm2 + paddd -80(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqa %xmm0,-128(%ebx) + movdqa -64(%ebx),%xmm0 + movdqa %xmm1,-112(%ebx) + movdqa %xmm6,-96(%ebx) + movdqa %xmm3,-80(%ebx) + movdqa -48(%ebx),%xmm1 + movdqa -32(%ebx),%xmm2 + movdqa -16(%ebx),%xmm3 + paddd -64(%ebp),%xmm0 + paddd -48(%ebp),%xmm1 + paddd -32(%ebp),%xmm2 + paddd -16(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqa %xmm0,-64(%ebx) + movdqa (%ebx),%xmm0 + movdqa %xmm1,-48(%ebx) + movdqa %xmm6,-32(%ebx) + movdqa %xmm3,-16(%ebx) + movdqa 16(%ebx),%xmm1 + movdqa 32(%ebx),%xmm2 + movdqa 48(%ebx),%xmm3 + paddd (%ebp),%xmm0 + paddd 16(%ebp),%xmm1 + paddd 32(%ebp),%xmm2 + paddd 48(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqa %xmm0,(%ebx) + movdqa 64(%ebx),%xmm0 + movdqa %xmm1,16(%ebx) + movdqa %xmm6,32(%ebx) + movdqa %xmm3,48(%ebx) + movdqa 80(%ebx),%xmm1 + movdqa 96(%ebx),%xmm2 + movdqa 112(%ebx),%xmm3 + paddd 64(%ebp),%xmm0 + paddd 80(%ebp),%xmm1 + paddd 96(%ebp),%xmm2 + paddd 112(%ebp),%xmm3 + movdqa %xmm0,%xmm6 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm6 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm6,%xmm3 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + movdqa %xmm0,64(%ebx) + movdqa %xmm1,80(%ebx) + movdqa %xmm6,96(%ebx) + movdqa %xmm3,112(%ebx) + movdqu -128(%esi),%xmm0 + movdqu -112(%esi),%xmm1 + movdqu -96(%esi),%xmm2 + movdqu -80(%esi),%xmm3 + pxor -128(%ebx),%xmm0 + pxor -64(%ebx),%xmm1 + pxor (%ebx),%xmm2 + pxor 64(%ebx),%xmm3 + movdqu %xmm0,-128(%edi) + movdqu %xmm1,-112(%edi) + movdqu %xmm2,-96(%edi) + movdqu %xmm3,-80(%edi) + movdqu -64(%esi),%xmm0 + movdqu -48(%esi),%xmm1 + movdqu -32(%esi),%xmm2 + movdqu -16(%esi),%xmm3 + pxor -112(%ebx),%xmm0 + pxor -48(%ebx),%xmm1 + pxor 16(%ebx),%xmm2 + pxor 80(%ebx),%xmm3 + movdqu %xmm0,-64(%edi) + movdqu %xmm1,-48(%edi) + movdqu %xmm2,-32(%edi) + movdqu %xmm3,-16(%edi) + movdqu (%esi),%xmm0 + movdqu 16(%esi),%xmm1 + movdqu 32(%esi),%xmm2 + movdqu 48(%esi),%xmm3 + pxor -96(%ebx),%xmm0 + pxor -32(%ebx),%xmm1 + pxor 32(%ebx),%xmm2 + pxor 96(%ebx),%xmm3 + movdqu %xmm0,(%edi) + movdqu %xmm1,16(%edi) + movdqu %xmm2,32(%edi) + movdqu %xmm3,48(%edi) + movdqu 64(%esi),%xmm0 + movdqu 80(%esi),%xmm1 + movdqu 96(%esi),%xmm2 + movdqu 112(%esi),%xmm3 + pxor -80(%ebx),%xmm0 + pxor -16(%ebx),%xmm1 + pxor 48(%ebx),%xmm2 + pxor 112(%ebx),%xmm3 + movdqu %xmm0,64(%edi) + movdqu %xmm1,80(%edi) + movdqu %xmm2,96(%edi) + movdqu %xmm3,112(%edi) + leal 256(%esi),%esi + leal 256(%edi),%edi + subl $256,%ecx + jnc .L009outer_loop + addl $256,%ecx + jz .L011done + movl 520(%esp),%ebx + leal -128(%esi),%esi + movl 516(%esp),%edx + leal -128(%edi),%edi + movd 64(%ebp),%xmm2 + movdqu (%ebx),%xmm3 + paddd 96(%eax),%xmm2 + pand 112(%eax),%xmm3 + por %xmm2,%xmm3 +.L0081x: + movdqa 32(%eax),%xmm0 + movdqu (%edx),%xmm1 + movdqu 16(%edx),%xmm2 + movdqa (%eax),%xmm6 + movdqa 16(%eax),%xmm7 + movl %ebp,48(%esp) + movdqa %xmm0,(%esp) + movdqa %xmm1,16(%esp) + movdqa %xmm2,32(%esp) + movdqa %xmm3,48(%esp) + movl $10,%edx + jmp .L012loop1x +.align 16 +.L013outer1x: + movdqa 80(%eax),%xmm3 + movdqa (%esp),%xmm0 + movdqa 16(%esp),%xmm1 + movdqa 32(%esp),%xmm2 + paddd 48(%esp),%xmm3 + movl $10,%edx + movdqa %xmm3,48(%esp) + jmp .L012loop1x +.align 16 +.L012loop1x: + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm1,%xmm1 + pshufd $147,%xmm3,%xmm3 + nop + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm1,%xmm1 + pshufd $57,%xmm3,%xmm3 + decl %edx + jnz .L012loop1x + paddd (%esp),%xmm0 + paddd 16(%esp),%xmm1 + paddd 32(%esp),%xmm2 + paddd 48(%esp),%xmm3 + cmpl $64,%ecx + jb .L014tail + movdqu (%esi),%xmm4 + movdqu 16(%esi),%xmm5 + pxor %xmm4,%xmm0 + movdqu 32(%esi),%xmm4 + pxor %xmm5,%xmm1 + movdqu 48(%esi),%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm3 + leal 64(%esi),%esi + movdqu %xmm0,(%edi) + movdqu %xmm1,16(%edi) + movdqu %xmm2,32(%edi) + movdqu %xmm3,48(%edi) + leal 64(%edi),%edi + subl $64,%ecx + jnz .L013outer1x + jmp .L011done +.L014tail: + movdqa %xmm0,(%esp) + movdqa %xmm1,16(%esp) + movdqa %xmm2,32(%esp) + movdqa %xmm3,48(%esp) + xorl %eax,%eax + xorl %edx,%edx + xorl %ebp,%ebp +.L015tail_loop: + movb (%esp,%ebp,1),%al + movb (%esi,%ebp,1),%dl + leal 1(%ebp),%ebp + xorb %dl,%al + movb %al,-1(%edi,%ebp,1) + decl %ecx + jnz .L015tail_loop +.L011done: + movl 512(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin +.align 64 +.Lssse3_data: +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +.long 1634760805,857760878,2036477234,1797285236 +.long 0,1,2,3 +.long 4,4,4,4 +.long 1,0,0,0 +.long 4,0,0,0 +.long 0,-1,-1,-1 +.align 64 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 +.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +.byte 114,103,62,0 +#endif diff --git a/linux-x86/crypto/rc4/rc4-586.S b/linux-x86/crypto/rc4/rc4-586.S index a5cce47c..d245589e 100644 --- a/linux-x86/crypto/rc4/rc4-586.S +++ b/linux-x86/crypto/rc4/rc4-586.S @@ -347,39 +347,4 @@ asm_RC4_set_key: popl %ebp ret .size asm_RC4_set_key,.-.L_asm_RC4_set_key_begin -.globl RC4_options -.hidden RC4_options -.type RC4_options,@function -.align 16 -RC4_options: -.L_RC4_options_begin: - call .L018pic_point -.L018pic_point: - popl %eax - leal .L019opts-.L018pic_point(%eax),%eax - call .L020PIC_me_up -.L020PIC_me_up: - popl %edx - leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx - movl (%edx),%edx - btl $20,%edx - jc .L0211xchar - btl $26,%edx - jnc .L022ret - addl $25,%eax - ret -.L0211xchar: - addl $12,%eax -.L022ret: - ret -.align 64 -.L019opts: -.byte 114,99,52,40,52,120,44,105,110,116,41,0 -.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 -.byte 114,99,52,40,56,120,44,109,109,120,41,0 -.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 -.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 -.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 -.size RC4_options,.-.L_RC4_options_begin #endif |