diff options
Diffstat (limited to 'linux-x86_64/crypto/chacha/chacha-x86_64.S')
-rw-r--r-- | linux-x86_64/crypto/chacha/chacha-x86_64.S | 73 |
1 files changed, 37 insertions, 36 deletions
diff --git a/linux-x86_64/crypto/chacha/chacha-x86_64.S b/linux-x86_64/crypto/chacha/chacha-x86_64.S index e994940a..25ec715f 100644 --- a/linux-x86_64/crypto/chacha/chacha-x86_64.S +++ b/linux-x86_64/crypto/chacha/chacha-x86_64.S @@ -23,6 +23,15 @@ .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe .Lsigma: .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 +.align 64 +.Lzeroz: +.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 +.Lfourz: +.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 +.Lincz: +.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +.Lsixteen: +.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .globl ChaCha20_ctr32 .hidden ChaCha20_ctr32 @@ -42,6 +51,7 @@ ChaCha20_ctr32: pushq %r14 pushq %r15 subq $64+24,%rsp +.Lctr32_body: movdqu (%rcx),%xmm1 @@ -279,13 +289,14 @@ ChaCha20_ctr32: jnz .Loop_tail .Ldone: - addq $64+24,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx + leaq 64+24+48(%rsp),%rsi + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lno_data: .byte 0xf3,0xc3 .size ChaCha20_ctr32,.-ChaCha20_ctr32 @@ -293,18 +304,12 @@ ChaCha20_ctr32: .align 32 ChaCha20_ssse3: .LChaCha20_ssse3: + movq %rsp,%r9 cmpq $128,%rdx ja .LChaCha20_4x .Ldo_sse3_after_all: - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - subq $64+24,%rsp + subq $64+8,%rsp movdqa .Lsigma(%rip),%xmm0 movdqu (%rcx),%xmm1 movdqu 16(%rcx),%xmm2 @@ -316,7 +321,7 @@ ChaCha20_ssse3: movdqa %xmm1,16(%rsp) movdqa %xmm2,32(%rsp) movdqa %xmm3,48(%rsp) - movl $10,%ebp + movq $10,%r8 jmp .Loop_ssse3 .align 32 @@ -326,7 +331,7 @@ ChaCha20_ssse3: movdqa 16(%rsp),%xmm1 movdqa 32(%rsp),%xmm2 paddd 48(%rsp),%xmm3 - movl $10,%ebp + movq $10,%r8 movdqa %xmm3,48(%rsp) jmp .Loop_ssse3 @@ -375,7 +380,7 @@ ChaCha20_ssse3: pshufd $78,%xmm2,%xmm2 pshufd $147,%xmm1,%xmm1 pshufd $57,%xmm3,%xmm3 - decl %ebp + decq %r8 jnz .Loop_ssse3 paddd 0(%rsp),%xmm0 paddd 16(%rsp),%xmm1 @@ -412,31 +417,27 @@ ChaCha20_ssse3: movdqa %xmm1,16(%rsp) movdqa %xmm2,32(%rsp) movdqa %xmm3,48(%rsp) - xorq %rbx,%rbx + xorq %r8,%r8 .Loop_tail_ssse3: - movzbl (%rsi,%rbx,1),%eax - movzbl (%rsp,%rbx,1),%ecx - leaq 1(%rbx),%rbx + movzbl (%rsi,%r8,1),%eax + movzbl (%rsp,%r8,1),%ecx + leaq 1(%r8),%r8 xorl %ecx,%eax - movb %al,-1(%rdi,%rbx,1) + movb %al,-1(%rdi,%r8,1) decq %rdx jnz .Loop_tail_ssse3 .Ldone_ssse3: - addq $64+24,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx + leaq (%r9),%rsp +.Lssse3_epilogue: .byte 0xf3,0xc3 .size ChaCha20_ssse3,.-ChaCha20_ssse3 .type ChaCha20_4x,@function .align 32 ChaCha20_4x: .LChaCha20_4x: + movq %rsp,%r9 movq %r10,%r11 shrq $32,%r10 testq $32,%r10 @@ -449,8 +450,7 @@ ChaCha20_4x: je .Ldo_sse3_after_all .Lproceed4x: - leaq -120(%rsp),%r11 - subq $0x148+0,%rsp + subq $0x140+8,%rsp movdqa .Lsigma(%rip),%xmm11 movdqu (%rcx),%xmm15 movdqu 16(%rcx),%xmm7 @@ -977,18 +977,18 @@ ChaCha20_4x: jnz .Loop_tail4x .Ldone4x: - addq $0x148+0,%rsp + leaq (%r9),%rsp +.L4x_epilogue: .byte 0xf3,0xc3 .size ChaCha20_4x,.-ChaCha20_4x .type ChaCha20_8x,@function .align 32 ChaCha20_8x: .LChaCha20_8x: - movq %rsp,%r10 + movq %rsp,%r9 subq $0x280+8,%rsp andq $-32,%rsp vzeroupper - movq %r10,640(%rsp) @@ -1579,7 +1579,8 @@ ChaCha20_8x: .Ldone8x: vzeroall - movq 640(%rsp),%rsp + leaq (%r9),%rsp +.L8x_epilogue: .byte 0xf3,0xc3 .size ChaCha20_8x,.-ChaCha20_8x #endif |