summaryrefslogtreecommitdiff
path: root/mac-x86_64/crypto/chacha/chacha-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'mac-x86_64/crypto/chacha/chacha-x86_64.S')
-rw-r--r--mac-x86_64/crypto/chacha/chacha-x86_64.S73
1 files changed, 37 insertions, 36 deletions
diff --git a/mac-x86_64/crypto/chacha/chacha-x86_64.S b/mac-x86_64/crypto/chacha/chacha-x86_64.S
index c3554c8d..51c0caa7 100644
--- a/mac-x86_64/crypto/chacha/chacha-x86_64.S
+++ b/mac-x86_64/crypto/chacha/chacha-x86_64.S
@@ -22,6 +22,15 @@ L$rot24:
.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
L$sigma:
.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
+.p2align 6
+L$zeroz:
+.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
+L$fourz:
+.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
+L$incz:
+.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+L$sixteen:
+.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32
@@ -41,6 +50,7 @@ _ChaCha20_ctr32:
pushq %r14
pushq %r15
subq $64+24,%rsp
+L$ctr32_body:
movdqu (%rcx),%xmm1
@@ -278,13 +288,14 @@ L$oop_tail:
jnz L$oop_tail
L$done:
- addq $64+24,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
+ leaq 64+24+48(%rsp),%rsi
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
L$no_data:
.byte 0xf3,0xc3
@@ -292,18 +303,12 @@ L$no_data:
.p2align 5
ChaCha20_ssse3:
L$ChaCha20_ssse3:
+ movq %rsp,%r9
cmpq $128,%rdx
ja L$ChaCha20_4x
L$do_sse3_after_all:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
-
- subq $64+24,%rsp
+ subq $64+8,%rsp
movdqa L$sigma(%rip),%xmm0
movdqu (%rcx),%xmm1
movdqu 16(%rcx),%xmm2
@@ -315,7 +320,7 @@ L$do_sse3_after_all:
movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp)
- movl $10,%ebp
+ movq $10,%r8
jmp L$oop_ssse3
.p2align 5
@@ -325,7 +330,7 @@ L$oop_outer_ssse3:
movdqa 16(%rsp),%xmm1
movdqa 32(%rsp),%xmm2
paddd 48(%rsp),%xmm3
- movl $10,%ebp
+ movq $10,%r8
movdqa %xmm3,48(%rsp)
jmp L$oop_ssse3
@@ -374,7 +379,7 @@ L$oop_ssse3:
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
- decl %ebp
+ decq %r8
jnz L$oop_ssse3
paddd 0(%rsp),%xmm0
paddd 16(%rsp),%xmm1
@@ -411,31 +416,27 @@ L$tail_ssse3:
movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp)
- xorq %rbx,%rbx
+ xorq %r8,%r8
L$oop_tail_ssse3:
- movzbl (%rsi,%rbx,1),%eax
- movzbl (%rsp,%rbx,1),%ecx
- leaq 1(%rbx),%rbx
+ movzbl (%rsi,%r8,1),%eax
+ movzbl (%rsp,%r8,1),%ecx
+ leaq 1(%r8),%r8
xorl %ecx,%eax
- movb %al,-1(%rdi,%rbx,1)
+ movb %al,-1(%rdi,%r8,1)
decq %rdx
jnz L$oop_tail_ssse3
L$done_ssse3:
- addq $64+24,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
+ leaq (%r9),%rsp
+L$ssse3_epilogue:
.byte 0xf3,0xc3
.p2align 5
ChaCha20_4x:
L$ChaCha20_4x:
+ movq %rsp,%r9
movq %r10,%r11
shrq $32,%r10
testq $32,%r10
@@ -448,8 +449,7 @@ L$ChaCha20_4x:
je L$do_sse3_after_all
L$proceed4x:
- leaq -120(%rsp),%r11
- subq $0x148+0,%rsp
+ subq $0x140+8,%rsp
movdqa L$sigma(%rip),%xmm11
movdqu (%rcx),%xmm15
movdqu 16(%rcx),%xmm7
@@ -976,18 +976,18 @@ L$oop_tail4x:
jnz L$oop_tail4x
L$done4x:
- addq $0x148+0,%rsp
+ leaq (%r9),%rsp
+L$4x_epilogue:
.byte 0xf3,0xc3
.p2align 5
ChaCha20_8x:
L$ChaCha20_8x:
- movq %rsp,%r10
+ movq %rsp,%r9
subq $0x280+8,%rsp
andq $-32,%rsp
vzeroupper
- movq %r10,640(%rsp)
@@ -1578,7 +1578,8 @@ L$oop_tail8x:
L$done8x:
vzeroall
- movq 640(%rsp),%rsp
+ leaq (%r9),%rsp
+L$8x_epilogue:
.byte 0xf3,0xc3
#endif