diff options
Diffstat (limited to 'linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S')
-rw-r--r-- | linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S | 198 |
1 files changed, 192 insertions, 6 deletions
diff --git a/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S index d149d0f7..a6f5e07d 100644 --- a/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +++ b/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S @@ -41,6 +41,7 @@ chacha20_poly1305_constants: .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff .type poly_hash_ad_internal,@function .align 64 @@ -2124,7 +2125,9 @@ chacha20_poly1305_seal: .cfi_offset r15, -56 leaq 32(%rsp),%rbp andq $-32,%rbp - movq %rdx,8+32(%rbp) + movq 56(%r9),%rbx + addq %rdx,%rbx + movq %rbx,8+32(%rbp) movq %r8,0+32(%rbp) movq %rdx,%rbx @@ -3587,11 +3590,9 @@ seal_sse_128_seal: seal_sse_tail_16: testq %rbx,%rbx - jz seal_sse_finalize + jz process_blocks_of_extra_in movq %rbx,%r8 - shlq $4,%r8 - leaq .and_masks(%rip),%r13 movq %rbx,%rcx leaq -1(%rsi,%rbx), %rsi pxor %xmm15,%xmm15 @@ -3615,7 +3616,72 @@ seal_sse_tail_16: subq $1,%rcx jnz 2b - pand -16(%r13,%r8), %xmm15 + + + + + + + + movq 288+32(%rsp),%r9 + movq 56(%r9),%r14 + movq 48(%r9),%r13 + testq %r14,%r14 + jz process_partial_block + + movq $16,%r15 + subq %rbx,%r15 + cmpq %r15,%r14 + + jge load_extra_in + movq %r14,%r15 + +load_extra_in: + + + leaq -1(%r13,%r15), %rsi + + + addq %r15,%r13 + subq %r15,%r14 + movq %r13,48(%r9) + movq %r14,56(%r9) + + + + addq %r15,%r8 + + + pxor %xmm11,%xmm11 +3: + pslldq $1,%xmm11 + pinsrb $0,(%rsi),%xmm11 + leaq -1(%rsi),%rsi + subq $1,%r15 + jnz 3b + + + + + movq %rbx,%r15 + +4: + pslldq $1,%xmm11 + subq $1,%r15 + jnz 4b + + + + + leaq .and_masks(%rip),%r15 + shlq $4,%rbx + pand -16(%r15,%rbx), %xmm15 + + + por %xmm11,%xmm15 + + + .byte 102,77,15,126,253 pextrq $1,%xmm15,%r14 addq %r13,%r10 @@ -3660,7 +3726,127 @@ seal_sse_tail_16: adcq %r9,%r11 adcq $0,%r12 -seal_sse_finalize: + +process_blocks_of_extra_in: + + movq 288+32(%rsp),%r9 + movq 48(%r9),%rsi + movq 56(%r9),%r8 + movq %r8,%rcx + shrq $4,%r8 + +5: + jz process_extra_in_trailer + addq 0(%rsi),%r10 + adcq 8+0(%rsi),%r11 + adcq $1,%r12 + movq 0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r10 + adcq %r14,%r11 + adcq $0,%r12 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + leaq 16(%rsi),%rsi + subq $1,%r8 + jmp 5b + +process_extra_in_trailer: + andq $15,%rcx + movq %rcx,%rbx + jz do_length_block + leaq -1(%rsi,%rcx), %rsi + +6: + pslldq $1,%xmm15 + pinsrb $0,(%rsi),%xmm15 + leaq -1(%rsi),%rsi + subq $1,%rcx + jnz 6b + +process_partial_block: + + leaq .and_masks(%rip),%r15 + shlq $4,%rbx + pand -16(%r15,%rbx), %xmm15 +.byte 102,77,15,126,253 + pextrq $1,%xmm15,%r14 + addq %r13,%r10 + adcq %r14,%r11 + adcq $1,%r12 + movq 0+0(%rbp),%rax + movq %rax,%r15 + mulq %r10 + movq %rax,%r13 + movq %rdx,%r14 + movq 0+0(%rbp),%rax + mulq %r11 + imulq %r12,%r15 + addq %rax,%r14 + adcq %rdx,%r15 + movq 8+0(%rbp),%rax + movq %rax,%r9 + mulq %r10 + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r10 + movq 8+0(%rbp),%rax + mulq %r11 + addq %rax,%r15 + adcq $0,%rdx + imulq %r12,%r9 + addq %r10,%r15 + adcq %rdx,%r9 + movq %r13,%r10 + movq %r14,%r11 + movq %r15,%r12 + andq $3,%r12 + movq %r15,%r13 + andq $-4,%r13 + movq %r9,%r14 + shrdq $2,%r9,%r15 + shrq $2,%r9 + addq %r13,%r10 + adcq %r14,%r11 + adcq $0,%r12 + addq %r15,%r10 + adcq %r9,%r11 + adcq $0,%r12 + + +do_length_block: addq 32(%rbp),%r10 adcq 8+32(%rbp),%r11 adcq $1,%r12 |