#if defined(__x86_64__) .text .extern OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P .align 64 .chacha20_consts: .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' .rol8: .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 .rol16: .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 .avx2_init: .long 0,0,0,0 .sse_inc: .long 1,0,0,0 .avx2_inc: .long 2,0,0,0,2,0,0,0 .clamp: .quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF .align 16 .and_masks: .byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 .type poly_hash_ad_internal,@function .align 64 poly_hash_ad_internal: .cfi_startproc xorq %r10,%r10 xorq %r11,%r11 xorq %r12,%r12 cmpq $13,%r8 jne hash_ad_loop poly_fast_tls_ad: movq (%rcx),%r10 movq 5(%rcx),%r11 shrq $24,%r11 movq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 .byte 0xf3,0xc3 hash_ad_loop: cmpq $16,%r8 jb hash_ad_tail addq 0(%rcx),%r10 adcq 8+0(%rcx),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq (1*16)(%rcx),%rcx subq $16,%r8 jmp hash_ad_loop hash_ad_tail: cmpq $0,%r8 je 1f xorq %r13,%r13 xorq %r14,%r14 xorq %r15,%r15 addq %r8,%rcx hash_ad_tail_loop: shldq $8,%r13,%r14 shlq $8,%r13 movzbq -1(%rcx),%r15 xorq %r15,%r13 decq %rcx decq %r8 jne hash_ad_tail_loop addq %r13,%r10 adcq %r14,%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 1: .byte 0xf3,0xc3 .cfi_endproc .size poly_hash_ad_internal, .-poly_hash_ad_internal .globl chacha20_poly1305_open .hidden chacha20_poly1305_open .type chacha20_poly1305_open,@function .align 64 chacha20_poly1305_open: .cfi_startproc pushq %rbp .cfi_adjust_cfa_offset 8 pushq %rbx .cfi_adjust_cfa_offset 8 pushq %r12 .cfi_adjust_cfa_offset 8 pushq %r13 .cfi_adjust_cfa_offset 8 pushq %r14 .cfi_adjust_cfa_offset 8 pushq %r15 .cfi_adjust_cfa_offset 8 pushq %r9 .cfi_adjust_cfa_offset 8 subq $288 + 32,%rsp .cfi_adjust_cfa_offset 288 + 32 .cfi_offset rbp, -16 .cfi_offset rbx, -24 .cfi_offset r12, -32 .cfi_offset r13, -40 .cfi_offset r14, -48 .cfi_offset r15, -56 .cfi_offset %r9, -64 leaq 32(%rsp),%rbp andq $-32,%rbp movq %rdx,8+32(%rbp) movq %r8,0+32(%rbp) movq %rdx,%rbx movl OPENSSL_ia32cap_P+8(%rip),%eax andl $288,%eax xorl $288,%eax jz chacha20_poly1305_open_avx2 1: cmpq $128,%rbx jbe open_sse_128 movdqa .chacha20_consts(%rip),%xmm0 movdqu 0(%r9),%xmm4 movdqu 16(%r9),%xmm8 movdqu 32(%r9),%xmm12 movdqa %xmm12,%xmm7 movdqa %xmm4,48(%rbp) movdqa %xmm8,64(%rbp) movdqa %xmm12,96(%rbp) movq $10,%r10 1: paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 decq %r10 jne 1b paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 pand .clamp(%rip),%xmm0 movdqa %xmm0,0(%rbp) movdqa %xmm4,16(%rbp) movq %r8,%r8 call poly_hash_ad_internal open_sse_main_loop: cmpq $256,%rbx jb 2f movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa %xmm0,%xmm2 movdqa %xmm4,%xmm6 movdqa %xmm8,%xmm10 movdqa %xmm0,%xmm3 movdqa %xmm4,%xmm7 movdqa %xmm8,%xmm11 movdqa 96(%rbp),%xmm15 paddd .sse_inc(%rip),%xmm15 movdqa %xmm15,%xmm14 paddd .sse_inc(%rip),%xmm14 movdqa %xmm14,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movdqa %xmm14,128(%rbp) movdqa %xmm15,144(%rbp) movq $4,%rcx movq %rsi,%r8 1: movdqa %xmm8,80(%rbp) movdqa .rol16(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 leaq 16(%r8),%r8 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm4 pxor %xmm8,%xmm4 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movdqa .rol8(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm4 pxor %xmm8,%xmm4 movdqa 80(%rbp),%xmm8 imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 .byte 102,15,58,15,255,4 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,12 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 movdqa %xmm8,80(%rbp) movdqa .rol16(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm4 pxor %xmm8,%xmm4 movdqa .rol8(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm4 pxor %xmm8,%xmm4 movdqa 80(%rbp),%xmm8 .byte 102,15,58,15,255,12 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,4 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 decq %rcx jge 1b addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%r8),%r8 cmpq $-6,%rcx jg 1b paddd .chacha20_consts(%rip),%xmm3 paddd 48(%rbp),%xmm7 paddd 64(%rbp),%xmm11 paddd 144(%rbp),%xmm15 paddd .chacha20_consts(%rip),%xmm2 paddd 48(%rbp),%xmm6 paddd 64(%rbp),%xmm10 paddd 128(%rbp),%xmm14 paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqa %xmm12,80(%rbp) movdqu 0 + 0(%rsi),%xmm12 pxor %xmm3,%xmm12 movdqu %xmm12,0 + 0(%rdi) movdqu 16 + 0(%rsi),%xmm12 pxor %xmm7,%xmm12 movdqu %xmm12,16 + 0(%rdi) movdqu 32 + 0(%rsi),%xmm12 pxor %xmm11,%xmm12 movdqu %xmm12,32 + 0(%rdi) movdqu 48 + 0(%rsi),%xmm12 pxor %xmm15,%xmm12 movdqu %xmm12,48 + 0(%rdi) movdqu 0 + 64(%rsi),%xmm3 movdqu 16 + 64(%rsi),%xmm7 movdqu 32 + 64(%rsi),%xmm11 movdqu 48 + 64(%rsi),%xmm15 pxor %xmm3,%xmm2 pxor %xmm7,%xmm6 pxor %xmm11,%xmm10 pxor %xmm14,%xmm15 movdqu %xmm2,0 + 64(%rdi) movdqu %xmm6,16 + 64(%rdi) movdqu %xmm10,32 + 64(%rdi) movdqu %xmm15,48 + 64(%rdi) movdqu 0 + 128(%rsi),%xmm3 movdqu 16 + 128(%rsi),%xmm7 movdqu 32 + 128(%rsi),%xmm11 movdqu 48 + 128(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 128(%rdi) movdqu %xmm5,16 + 128(%rdi) movdqu %xmm9,32 + 128(%rdi) movdqu %xmm15,48 + 128(%rdi) movdqu 0 + 192(%rsi),%xmm3 movdqu 16 + 192(%rsi),%xmm7 movdqu 32 + 192(%rsi),%xmm11 movdqu 48 + 192(%rsi),%xmm15 pxor %xmm3,%xmm0 pxor %xmm7,%xmm4 pxor %xmm11,%xmm8 pxor 80(%rbp),%xmm15 movdqu %xmm0,0 + 192(%rdi) movdqu %xmm4,16 + 192(%rdi) movdqu %xmm8,32 + 192(%rdi) movdqu %xmm15,48 + 192(%rdi) leaq 256(%rsi),%rsi leaq 256(%rdi),%rdi subq $256,%rbx jmp open_sse_main_loop 2: testq %rbx,%rbx jz open_sse_finalize cmpq $64,%rbx ja 3f movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa 96(%rbp),%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) xorq %r8,%r8 movq %rbx,%rcx cmpq $16,%rcx jb 2f 1: addq 0(%rsi,%r8), %r10 adcq 8+0(%rsi,%r8), %r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 subq $16,%rcx 2: addq $16,%r8 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 cmpq $16,%rcx jae 1b cmpq $160,%r8 jne 2b paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 jmp open_sse_tail_64_dec_loop 3: cmpq $128,%rbx ja 3f movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa 96(%rbp),%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movq %rbx,%rcx andq $-16,%rcx xorq %r8,%r8 1: addq 0(%rsi,%r8), %r10 adcq 8+0(%rsi,%r8), %r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 2: addq $16,%r8 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 cmpq %rcx,%r8 jb 1b cmpq $160,%r8 jne 2b paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqu 0 + 0(%rsi),%xmm3 movdqu 16 + 0(%rsi),%xmm7 movdqu 32 + 0(%rsi),%xmm11 movdqu 48 + 0(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 0(%rdi) movdqu %xmm5,16 + 0(%rdi) movdqu %xmm9,32 + 0(%rdi) movdqu %xmm15,48 + 0(%rdi) subq $64,%rbx leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi jmp open_sse_tail_64_dec_loop 3: cmpq $192,%rbx ja 3f movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa %xmm0,%xmm2 movdqa %xmm4,%xmm6 movdqa %xmm8,%xmm10 movdqa 96(%rbp),%xmm14 paddd .sse_inc(%rip),%xmm14 movdqa %xmm14,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movdqa %xmm14,128(%rbp) movq %rbx,%rcx movq $160,%r8 cmpq $160,%rcx cmovgq %r8,%rcx andq $-16,%rcx xorq %r8,%r8 1: addq 0(%rsi,%r8), %r10 adcq 8+0(%rsi,%r8), %r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 2: addq $16,%r8 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 cmpq %rcx,%r8 jb 1b cmpq $160,%r8 jne 2b cmpq $176,%rbx jb 1f addq 160(%rsi),%r10 adcq 8+160(%rsi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 cmpq $192,%rbx jb 1f addq 176(%rsi),%r10 adcq 8+176(%rsi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 1: paddd .chacha20_consts(%rip),%xmm2 paddd 48(%rbp),%xmm6 paddd 64(%rbp),%xmm10 paddd 128(%rbp),%xmm14 paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqu 0 + 0(%rsi),%xmm3 movdqu 16 + 0(%rsi),%xmm7 movdqu 32 + 0(%rsi),%xmm11 movdqu 48 + 0(%rsi),%xmm15 pxor %xmm3,%xmm2 pxor %xmm7,%xmm6 pxor %xmm11,%xmm10 pxor %xmm14,%xmm15 movdqu %xmm2,0 + 0(%rdi) movdqu %xmm6,16 + 0(%rdi) movdqu %xmm10,32 + 0(%rdi) movdqu %xmm15,48 + 0(%rdi) movdqu 0 + 64(%rsi),%xmm3 movdqu 16 + 64(%rsi),%xmm7 movdqu 32 + 64(%rsi),%xmm11 movdqu 48 + 64(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 64(%rdi) movdqu %xmm5,16 + 64(%rdi) movdqu %xmm9,32 + 64(%rdi) movdqu %xmm15,48 + 64(%rdi) subq $128,%rbx leaq 128(%rsi),%rsi leaq 128(%rdi),%rdi jmp open_sse_tail_64_dec_loop 3: movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa %xmm0,%xmm2 movdqa %xmm4,%xmm6 movdqa %xmm8,%xmm10 movdqa %xmm0,%xmm3 movdqa %xmm4,%xmm7 movdqa %xmm8,%xmm11 movdqa 96(%rbp),%xmm15 paddd .sse_inc(%rip),%xmm15 movdqa %xmm15,%xmm14 paddd .sse_inc(%rip),%xmm14 movdqa %xmm14,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movdqa %xmm14,128(%rbp) movdqa %xmm15,144(%rbp) xorq %r8,%r8 1: addq 0(%rsi,%r8), %r10 adcq 8+0(%rsi,%r8), %r11 adcq $1,%r12 movdqa %xmm11,80(%rbp) paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm11 pslld $12,%xmm11 psrld $20,%xmm4 pxor %xmm11,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm11 pslld $7,%xmm11 psrld $25,%xmm4 pxor %xmm11,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm11 pslld $12,%xmm11 psrld $20,%xmm5 pxor %xmm11,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm11 pslld $7,%xmm11 psrld $25,%xmm5 pxor %xmm11,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm11 pslld $12,%xmm11 psrld $20,%xmm6 pxor %xmm11,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm11 pslld $7,%xmm11 psrld $25,%xmm6 pxor %xmm11,%xmm6 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 movdqa 80(%rbp),%xmm11 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movdqa %xmm9,80(%rbp) paddd %xmm7,%xmm3 pxor %xmm3,%xmm15 pshufb .rol16(%rip),%xmm15 paddd %xmm15,%xmm11 pxor %xmm11,%xmm7 movdqa %xmm7,%xmm9 pslld $12,%xmm9 psrld $20,%xmm7 pxor %xmm9,%xmm7 paddd %xmm7,%xmm3 pxor %xmm3,%xmm15 pshufb .rol8(%rip),%xmm15 paddd %xmm15,%xmm11 pxor %xmm11,%xmm7 movdqa %xmm7,%xmm9 pslld $7,%xmm9 psrld $25,%xmm7 pxor %xmm9,%xmm7 .byte 102,15,58,15,255,4 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,12 movdqa 80(%rbp),%xmm9 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx movdqa %xmm11,80(%rbp) paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm11 pslld $12,%xmm11 psrld $20,%xmm4 pxor %xmm11,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm11 pslld $7,%xmm11 psrld $25,%xmm4 pxor %xmm11,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm11 pslld $12,%xmm11 psrld $20,%xmm5 pxor %xmm11,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm11 pslld $7,%xmm11 psrld $25,%xmm5 pxor %xmm11,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm11 pslld $12,%xmm11 psrld $20,%xmm6 pxor %xmm11,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm11 pslld $7,%xmm11 psrld $25,%xmm6 pxor %xmm11,%xmm6 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 movdqa 80(%rbp),%xmm11 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 movdqa %xmm9,80(%rbp) paddd %xmm7,%xmm3 pxor %xmm3,%xmm15 pshufb .rol16(%rip),%xmm15 paddd %xmm15,%xmm11 pxor %xmm11,%xmm7 movdqa %xmm7,%xmm9 pslld $12,%xmm9 psrld $20,%xmm7 pxor %xmm9,%xmm7 paddd %xmm7,%xmm3 pxor %xmm3,%xmm15 pshufb .rol8(%rip),%xmm15 paddd %xmm15,%xmm11 pxor %xmm11,%xmm7 movdqa %xmm7,%xmm9 pslld $7,%xmm9 psrld $25,%xmm7 pxor %xmm9,%xmm7 .byte 102,15,58,15,255,12 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,4 movdqa 80(%rbp),%xmm9 addq $16,%r8 cmpq $160,%r8 jb 1b movq %rbx,%rcx andq $-16,%rcx 1: addq 0(%rsi,%r8), %r10 adcq 8+0(%rsi,%r8), %r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 addq $16,%r8 cmpq %rcx,%r8 jb 1b paddd .chacha20_consts(%rip),%xmm3 paddd 48(%rbp),%xmm7 paddd 64(%rbp),%xmm11 paddd 144(%rbp),%xmm15 paddd .chacha20_consts(%rip),%xmm2 paddd 48(%rbp),%xmm6 paddd 64(%rbp),%xmm10 paddd 128(%rbp),%xmm14 paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqa %xmm12,80(%rbp) movdqu 0 + 0(%rsi),%xmm12 pxor %xmm3,%xmm12 movdqu %xmm12,0 + 0(%rdi) movdqu 16 + 0(%rsi),%xmm12 pxor %xmm7,%xmm12 movdqu %xmm12,16 + 0(%rdi) movdqu 32 + 0(%rsi),%xmm12 pxor %xmm11,%xmm12 movdqu %xmm12,32 + 0(%rdi) movdqu 48 + 0(%rsi),%xmm12 pxor %xmm15,%xmm12 movdqu %xmm12,48 + 0(%rdi) movdqu 0 + 64(%rsi),%xmm3 movdqu 16 + 64(%rsi),%xmm7 movdqu 32 + 64(%rsi),%xmm11 movdqu 48 + 64(%rsi),%xmm15 pxor %xmm3,%xmm2 pxor %xmm7,%xmm6 pxor %xmm11,%xmm10 pxor %xmm14,%xmm15 movdqu %xmm2,0 + 64(%rdi) movdqu %xmm6,16 + 64(%rdi) movdqu %xmm10,32 + 64(%rdi) movdqu %xmm15,48 + 64(%rdi) movdqu 0 + 128(%rsi),%xmm3 movdqu 16 + 128(%rsi),%xmm7 movdqu 32 + 128(%rsi),%xmm11 movdqu 48 + 128(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 128(%rdi) movdqu %xmm5,16 + 128(%rdi) movdqu %xmm9,32 + 128(%rdi) movdqu %xmm15,48 + 128(%rdi) movdqa 80(%rbp),%xmm12 subq $192,%rbx leaq 192(%rsi),%rsi leaq 192(%rdi),%rdi open_sse_tail_64_dec_loop: cmpq $16,%rbx jb 1f subq $16,%rbx movdqu (%rsi),%xmm3 pxor %xmm3,%xmm0 movdqu %xmm0,(%rdi) leaq 16(%rsi),%rsi leaq 16(%rdi),%rdi movdqa %xmm4,%xmm0 movdqa %xmm8,%xmm4 movdqa %xmm12,%xmm8 jmp open_sse_tail_64_dec_loop 1: movdqa %xmm0,%xmm1 open_sse_tail_16: testq %rbx,%rbx jz open_sse_finalize pxor %xmm3,%xmm3 leaq -1(%rsi,%rbx), %rsi movq %rbx,%r8 2: pslldq $1,%xmm3 pinsrb $0,(%rsi),%xmm3 subq $1,%rsi subq $1,%r8 jnz 2b 3: .byte 102,73,15,126,221 pextrq $1,%xmm3,%r14 pxor %xmm1,%xmm3 2: pextrb $0,%xmm3,(%rdi) psrldq $1,%xmm3 addq $1,%rdi subq $1,%rbx jne 2b addq %r13,%r10 adcq %r14,%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 open_sse_finalize: addq 32(%rbp),%r10 adcq 8+32(%rbp),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 movq %r10,%r13 movq %r11,%r14 movq %r12,%r15 subq $-5,%r10 sbbq $-1,%r11 sbbq $3,%r12 cmovcq %r13,%r10 cmovcq %r14,%r11 cmovcq %r15,%r12 addq 0+16(%rbp),%r10 adcq 8+16(%rbp),%r11 addq $288 + 32,%rsp .cfi_adjust_cfa_offset -(288 + 32) popq %r9 .cfi_adjust_cfa_offset -8 movq %r10,(%r9) movq %r11,8(%r9) popq %r15 .cfi_adjust_cfa_offset -8 popq %r14 .cfi_adjust_cfa_offset -8 popq %r13 .cfi_adjust_cfa_offset -8 popq %r12 .cfi_adjust_cfa_offset -8 popq %rbx .cfi_adjust_cfa_offset -8 popq %rbp .cfi_adjust_cfa_offset -8 .byte 0xf3,0xc3 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32 open_sse_128: movdqu .chacha20_consts(%rip),%xmm0 movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm2 movdqu 0(%r9),%xmm4 movdqa %xmm4,%xmm5 movdqa %xmm4,%xmm6 movdqu 16(%r9),%xmm8 movdqa %xmm8,%xmm9 movdqa %xmm8,%xmm10 movdqu 32(%r9),%xmm12 movdqa %xmm12,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm14 paddd .sse_inc(%rip),%xmm14 movdqa %xmm4,%xmm7 movdqa %xmm8,%xmm11 movdqa %xmm13,%xmm15 movq $10,%r10 1: paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 decq %r10 jnz 1b paddd .chacha20_consts(%rip),%xmm0 paddd .chacha20_consts(%rip),%xmm1 paddd .chacha20_consts(%rip),%xmm2 paddd %xmm7,%xmm4 paddd %xmm7,%xmm5 paddd %xmm7,%xmm6 paddd %xmm11,%xmm9 paddd %xmm11,%xmm10 paddd %xmm15,%xmm13 paddd .sse_inc(%rip),%xmm15 paddd %xmm15,%xmm14 pand .clamp(%rip),%xmm0 movdqa %xmm0,0(%rbp) movdqa %xmm4,16(%rbp) movq %r8,%r8 call poly_hash_ad_internal 1: cmpq $16,%rbx jb open_sse_tail_16 subq $16,%rbx addq 0(%rsi),%r10 adcq 8+0(%rsi),%r11 adcq $1,%r12 movdqu 0(%rsi),%xmm3 pxor %xmm3,%xmm1 movdqu %xmm1,0(%rdi) leaq 16(%rsi),%rsi leaq 16(%rdi),%rdi movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 movdqa %xmm5,%xmm1 movdqa %xmm9,%xmm5 movdqa %xmm13,%xmm9 movdqa %xmm2,%xmm13 movdqa %xmm6,%xmm2 movdqa %xmm10,%xmm6 movdqa %xmm14,%xmm10 jmp 1b jmp open_sse_tail_16 .size chacha20_poly1305_open, .-chacha20_poly1305_open .cfi_endproc .globl chacha20_poly1305_seal .hidden chacha20_poly1305_seal .type chacha20_poly1305_seal,@function .align 64 chacha20_poly1305_seal: .cfi_startproc pushq %rbp .cfi_adjust_cfa_offset 8 pushq %rbx .cfi_adjust_cfa_offset 8 pushq %r12 .cfi_adjust_cfa_offset 8 pushq %r13 .cfi_adjust_cfa_offset 8 pushq %r14 .cfi_adjust_cfa_offset 8 pushq %r15 .cfi_adjust_cfa_offset 8 pushq %r9 .cfi_adjust_cfa_offset 8 subq $288 + 32,%rsp .cfi_adjust_cfa_offset 288 + 32 .cfi_offset rbp, -16 .cfi_offset rbx, -24 .cfi_offset r12, -32 .cfi_offset r13, -40 .cfi_offset r14, -48 .cfi_offset r15, -56 .cfi_offset %r9, -64 leaq 32(%rsp),%rbp andq $-32,%rbp movq %rdx,8+32(%rbp) movq %r8,0+32(%rbp) movq %rdx,%rbx movl OPENSSL_ia32cap_P+8(%rip),%eax andl $288,%eax xorl $288,%eax jz chacha20_poly1305_seal_avx2 cmpq $128,%rbx jbe seal_sse_128 movdqa .chacha20_consts(%rip),%xmm0 movdqu 0(%r9),%xmm4 movdqu 16(%r9),%xmm8 movdqu 32(%r9),%xmm12 movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm2 movdqa %xmm0,%xmm3 movdqa %xmm4,%xmm5 movdqa %xmm4,%xmm6 movdqa %xmm4,%xmm7 movdqa %xmm8,%xmm9 movdqa %xmm8,%xmm10 movdqa %xmm8,%xmm11 movdqa %xmm12,%xmm15 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,%xmm14 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,%xmm13 paddd .sse_inc(%rip),%xmm12 movdqa %xmm4,48(%rbp) movdqa %xmm8,64(%rbp) movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movdqa %xmm14,128(%rbp) movdqa %xmm15,144(%rbp) movq $10,%r10 1: movdqa %xmm8,80(%rbp) movdqa .rol16(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm4 pxor %xmm8,%xmm4 movdqa .rol8(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm4 pxor %xmm8,%xmm4 movdqa 80(%rbp),%xmm8 .byte 102,15,58,15,255,4 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,12 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 movdqa %xmm8,80(%rbp) movdqa .rol16(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm4 pxor %xmm8,%xmm4 movdqa .rol8(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm4 pxor %xmm8,%xmm4 movdqa 80(%rbp),%xmm8 .byte 102,15,58,15,255,12 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,4 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 decq %r10 jnz 1b paddd .chacha20_consts(%rip),%xmm3 paddd 48(%rbp),%xmm7 paddd 64(%rbp),%xmm11 paddd 144(%rbp),%xmm15 paddd .chacha20_consts(%rip),%xmm2 paddd 48(%rbp),%xmm6 paddd 64(%rbp),%xmm10 paddd 128(%rbp),%xmm14 paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 pand .clamp(%rip),%xmm3 movdqa %xmm3,0(%rbp) movdqa %xmm7,16(%rbp) movq %r8,%r8 call poly_hash_ad_internal movdqu 0 + 0(%rsi),%xmm3 movdqu 16 + 0(%rsi),%xmm7 movdqu 32 + 0(%rsi),%xmm11 movdqu 48 + 0(%rsi),%xmm15 pxor %xmm3,%xmm2 pxor %xmm7,%xmm6 pxor %xmm11,%xmm10 pxor %xmm14,%xmm15 movdqu %xmm2,0 + 0(%rdi) movdqu %xmm6,16 + 0(%rdi) movdqu %xmm10,32 + 0(%rdi) movdqu %xmm15,48 + 0(%rdi) movdqu 0 + 64(%rsi),%xmm3 movdqu 16 + 64(%rsi),%xmm7 movdqu 32 + 64(%rsi),%xmm11 movdqu 48 + 64(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 64(%rdi) movdqu %xmm5,16 + 64(%rdi) movdqu %xmm9,32 + 64(%rdi) movdqu %xmm15,48 + 64(%rdi) cmpq $192,%rbx ja 1f movq $128,%rcx subq $128,%rbx leaq 128(%rsi),%rsi jmp seal_sse_128_seal_hash 1: movdqu 0 + 128(%rsi),%xmm3 movdqu 16 + 128(%rsi),%xmm7 movdqu 32 + 128(%rsi),%xmm11 movdqu 48 + 128(%rsi),%xmm15 pxor %xmm3,%xmm0 pxor %xmm7,%xmm4 pxor %xmm11,%xmm8 pxor %xmm12,%xmm15 movdqu %xmm0,0 + 128(%rdi) movdqu %xmm4,16 + 128(%rdi) movdqu %xmm8,32 + 128(%rdi) movdqu %xmm15,48 + 128(%rdi) movq $192,%rcx subq $192,%rbx leaq 192(%rsi),%rsi movq $2,%rcx movq $8,%r8 cmpq $64,%rbx jbe seal_sse_tail_64 cmpq $128,%rbx jbe seal_sse_tail_128 cmpq $192,%rbx jbe seal_sse_tail_192 1: movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa %xmm0,%xmm2 movdqa %xmm4,%xmm6 movdqa %xmm8,%xmm10 movdqa %xmm0,%xmm3 movdqa %xmm4,%xmm7 movdqa %xmm8,%xmm11 movdqa 96(%rbp),%xmm15 paddd .sse_inc(%rip),%xmm15 movdqa %xmm15,%xmm14 paddd .sse_inc(%rip),%xmm14 movdqa %xmm14,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movdqa %xmm14,128(%rbp) movdqa %xmm15,144(%rbp) 2: movdqa %xmm8,80(%rbp) movdqa .rol16(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm4 pxor %xmm8,%xmm4 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movdqa .rol8(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm4 pxor %xmm8,%xmm4 movdqa 80(%rbp),%xmm8 imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 .byte 102,15,58,15,255,4 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,12 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 movdqa %xmm8,80(%rbp) movdqa .rol16(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $20,%xmm8 pslld $32-20,%xmm4 pxor %xmm8,%xmm4 movdqa .rol8(%rip),%xmm8 paddd %xmm7,%xmm3 paddd %xmm6,%xmm2 paddd %xmm5,%xmm1 paddd %xmm4,%xmm0 pxor %xmm3,%xmm15 pxor %xmm2,%xmm14 pxor %xmm1,%xmm13 pxor %xmm0,%xmm12 .byte 102,69,15,56,0,248 .byte 102,69,15,56,0,240 .byte 102,69,15,56,0,232 .byte 102,69,15,56,0,224 movdqa 80(%rbp),%xmm8 paddd %xmm15,%xmm11 paddd %xmm14,%xmm10 paddd %xmm13,%xmm9 paddd %xmm12,%xmm8 pxor %xmm11,%xmm7 pxor %xmm10,%xmm6 pxor %xmm9,%xmm5 pxor %xmm8,%xmm4 movdqa %xmm8,80(%rbp) movdqa %xmm7,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm7 pxor %xmm8,%xmm7 movdqa %xmm6,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm6 pxor %xmm8,%xmm6 movdqa %xmm5,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm5 pxor %xmm8,%xmm5 movdqa %xmm4,%xmm8 psrld $25,%xmm8 pslld $32-25,%xmm4 pxor %xmm8,%xmm4 movdqa 80(%rbp),%xmm8 .byte 102,15,58,15,255,12 .byte 102,69,15,58,15,219,8 .byte 102,69,15,58,15,255,4 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 leaq 16(%rdi),%rdi decq %r8 jge 2b addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi decq %rcx jg 2b paddd .chacha20_consts(%rip),%xmm3 paddd 48(%rbp),%xmm7 paddd 64(%rbp),%xmm11 paddd 144(%rbp),%xmm15 paddd .chacha20_consts(%rip),%xmm2 paddd 48(%rbp),%xmm6 paddd 64(%rbp),%xmm10 paddd 128(%rbp),%xmm14 paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqa %xmm14,80(%rbp) movdqa %xmm14,80(%rbp) movdqu 0 + 0(%rsi),%xmm14 pxor %xmm3,%xmm14 movdqu %xmm14,0 + 0(%rdi) movdqu 16 + 0(%rsi),%xmm14 pxor %xmm7,%xmm14 movdqu %xmm14,16 + 0(%rdi) movdqu 32 + 0(%rsi),%xmm14 pxor %xmm11,%xmm14 movdqu %xmm14,32 + 0(%rdi) movdqu 48 + 0(%rsi),%xmm14 pxor %xmm15,%xmm14 movdqu %xmm14,48 + 0(%rdi) movdqa 80(%rbp),%xmm14 movdqu 0 + 64(%rsi),%xmm3 movdqu 16 + 64(%rsi),%xmm7 movdqu 32 + 64(%rsi),%xmm11 movdqu 48 + 64(%rsi),%xmm15 pxor %xmm3,%xmm2 pxor %xmm7,%xmm6 pxor %xmm11,%xmm10 pxor %xmm14,%xmm15 movdqu %xmm2,0 + 64(%rdi) movdqu %xmm6,16 + 64(%rdi) movdqu %xmm10,32 + 64(%rdi) movdqu %xmm15,48 + 64(%rdi) movdqu 0 + 128(%rsi),%xmm3 movdqu 16 + 128(%rsi),%xmm7 movdqu 32 + 128(%rsi),%xmm11 movdqu 48 + 128(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 128(%rdi) movdqu %xmm5,16 + 128(%rdi) movdqu %xmm9,32 + 128(%rdi) movdqu %xmm15,48 + 128(%rdi) cmpq $256,%rbx ja 3f movq $192,%rcx subq $192,%rbx leaq 192(%rsi),%rsi jmp seal_sse_128_seal_hash 3: movdqu 0 + 192(%rsi),%xmm3 movdqu 16 + 192(%rsi),%xmm7 movdqu 32 + 192(%rsi),%xmm11 movdqu 48 + 192(%rsi),%xmm15 pxor %xmm3,%xmm0 pxor %xmm7,%xmm4 pxor %xmm11,%xmm8 pxor %xmm12,%xmm15 movdqu %xmm0,0 + 192(%rdi) movdqu %xmm4,16 + 192(%rdi) movdqu %xmm8,32 + 192(%rdi) movdqu %xmm15,48 + 192(%rdi) leaq 256(%rsi),%rsi subq $256,%rbx movq $6,%rcx movq $4,%r8 cmpq $192,%rbx jg 1b movq %rbx,%rcx testq %rbx,%rbx je seal_sse_128_seal_hash movq $6,%rcx cmpq $64,%rbx jg 3f seal_sse_tail_64: movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa 96(%rbp),%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 jmp seal_sse_128_seal 3: cmpq $128,%rbx jg 3f seal_sse_tail_128: movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa 96(%rbp),%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 leaq 16(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqu 0 + 0(%rsi),%xmm3 movdqu 16 + 0(%rsi),%xmm7 movdqu 32 + 0(%rsi),%xmm11 movdqu 48 + 0(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 0(%rdi) movdqu %xmm5,16 + 0(%rdi) movdqu %xmm9,32 + 0(%rdi) movdqu %xmm15,48 + 0(%rdi) movq $64,%rcx subq $64,%rbx leaq 64(%rsi),%rsi jmp seal_sse_128_seal_hash 3: seal_sse_tail_192: movdqa .chacha20_consts(%rip),%xmm0 movdqa 48(%rbp),%xmm4 movdqa 64(%rbp),%xmm8 movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm5 movdqa %xmm8,%xmm9 movdqa %xmm0,%xmm2 movdqa %xmm4,%xmm6 movdqa %xmm8,%xmm10 movdqa 96(%rbp),%xmm14 paddd .sse_inc(%rip),%xmm14 movdqa %xmm14,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm13,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,96(%rbp) movdqa %xmm13,112(%rbp) movdqa %xmm14,128(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 leaq 16(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b paddd .chacha20_consts(%rip),%xmm2 paddd 48(%rbp),%xmm6 paddd 64(%rbp),%xmm10 paddd 128(%rbp),%xmm14 paddd .chacha20_consts(%rip),%xmm1 paddd 48(%rbp),%xmm5 paddd 64(%rbp),%xmm9 paddd 112(%rbp),%xmm13 paddd .chacha20_consts(%rip),%xmm0 paddd 48(%rbp),%xmm4 paddd 64(%rbp),%xmm8 paddd 96(%rbp),%xmm12 movdqu 0 + 0(%rsi),%xmm3 movdqu 16 + 0(%rsi),%xmm7 movdqu 32 + 0(%rsi),%xmm11 movdqu 48 + 0(%rsi),%xmm15 pxor %xmm3,%xmm2 pxor %xmm7,%xmm6 pxor %xmm11,%xmm10 pxor %xmm14,%xmm15 movdqu %xmm2,0 + 0(%rdi) movdqu %xmm6,16 + 0(%rdi) movdqu %xmm10,32 + 0(%rdi) movdqu %xmm15,48 + 0(%rdi) movdqu 0 + 64(%rsi),%xmm3 movdqu 16 + 64(%rsi),%xmm7 movdqu 32 + 64(%rsi),%xmm11 movdqu 48 + 64(%rsi),%xmm15 pxor %xmm3,%xmm1 pxor %xmm7,%xmm5 pxor %xmm11,%xmm9 pxor %xmm13,%xmm15 movdqu %xmm1,0 + 64(%rdi) movdqu %xmm5,16 + 64(%rdi) movdqu %xmm9,32 + 64(%rdi) movdqu %xmm15,48 + 64(%rdi) movq $128,%rcx subq $128,%rbx leaq 128(%rsi),%rsi seal_sse_128_seal_hash: cmpq $16,%rcx jb seal_sse_128_seal addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 subq $16,%rcx leaq 16(%rdi),%rdi jmp seal_sse_128_seal_hash seal_sse_128_seal: cmpq $16,%rbx jb seal_sse_tail_16 subq $16,%rbx movdqu 0(%rsi),%xmm3 pxor %xmm3,%xmm0 movdqu %xmm0,0(%rdi) addq 0(%rdi),%r10 adcq 8(%rdi),%r11 adcq $1,%r12 leaq 16(%rsi),%rsi leaq 16(%rdi),%rdi movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 movdqa %xmm4,%xmm0 movdqa %xmm8,%xmm4 movdqa %xmm12,%xmm8 movdqa %xmm1,%xmm12 movdqa %xmm5,%xmm1 movdqa %xmm9,%xmm5 movdqa %xmm13,%xmm9 jmp seal_sse_128_seal seal_sse_tail_16: testq %rbx,%rbx jz seal_sse_finalize movq %rbx,%r8 shlq $4,%r8 leaq .and_masks(%rip),%r13 movq %rbx,%rcx leaq -1(%rsi,%rbx), %rsi pxor %xmm15,%xmm15 1: pslldq $1,%xmm15 pinsrb $0,(%rsi),%xmm15 leaq -1(%rsi),%rsi decq %rcx jne 1b pxor %xmm0,%xmm15 movq %rbx,%rcx movdqu %xmm15,%xmm0 2: pextrb $0,%xmm0,(%rdi) psrldq $1,%xmm0 addq $1,%rdi subq $1,%rcx jnz 2b pand -16(%r13,%r8), %xmm15 .byte 102,77,15,126,253 pextrq $1,%xmm15,%r14 addq %r13,%r10 adcq %r14,%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 seal_sse_finalize: addq 32(%rbp),%r10 adcq 8+32(%rbp),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 movq %r10,%r13 movq %r11,%r14 movq %r12,%r15 subq $-5,%r10 sbbq $-1,%r11 sbbq $3,%r12 cmovcq %r13,%r10 cmovcq %r14,%r11 cmovcq %r15,%r12 addq 0+16(%rbp),%r10 adcq 8+16(%rbp),%r11 addq $288 + 32,%rsp .cfi_adjust_cfa_offset -(288 + 32) popq %r9 .cfi_adjust_cfa_offset -8 movq %r10,0(%r9) movq %r11,8(%r9) popq %r15 .cfi_adjust_cfa_offset -8 popq %r14 .cfi_adjust_cfa_offset -8 popq %r13 .cfi_adjust_cfa_offset -8 popq %r12 .cfi_adjust_cfa_offset -8 popq %rbx .cfi_adjust_cfa_offset -8 popq %rbp .cfi_adjust_cfa_offset -8 .byte 0xf3,0xc3 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32 seal_sse_128: movdqu .chacha20_consts(%rip),%xmm0 movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm2 movdqu 0(%r9),%xmm4 movdqa %xmm4,%xmm5 movdqa %xmm4,%xmm6 movdqu 16(%r9),%xmm8 movdqa %xmm8,%xmm9 movdqa %xmm8,%xmm10 movdqu 32(%r9),%xmm14 movdqa %xmm14,%xmm12 paddd .sse_inc(%rip),%xmm12 movdqa %xmm12,%xmm13 paddd .sse_inc(%rip),%xmm13 movdqa %xmm4,%xmm7 movdqa %xmm8,%xmm11 movdqa %xmm12,%xmm15 movq $10,%r10 1: paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,4 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,12 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,4 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,12 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,4 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,12 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol16(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $12,%xmm3 psrld $20,%xmm4 pxor %xmm3,%xmm4 paddd %xmm4,%xmm0 pxor %xmm0,%xmm12 pshufb .rol8(%rip),%xmm12 paddd %xmm12,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm3 pslld $7,%xmm3 psrld $25,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,15,228,12 .byte 102,69,15,58,15,192,8 .byte 102,69,15,58,15,228,4 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol16(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $12,%xmm3 psrld $20,%xmm5 pxor %xmm3,%xmm5 paddd %xmm5,%xmm1 pxor %xmm1,%xmm13 pshufb .rol8(%rip),%xmm13 paddd %xmm13,%xmm9 pxor %xmm9,%xmm5 movdqa %xmm5,%xmm3 pslld $7,%xmm3 psrld $25,%xmm5 pxor %xmm3,%xmm5 .byte 102,15,58,15,237,12 .byte 102,69,15,58,15,201,8 .byte 102,69,15,58,15,237,4 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol16(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $12,%xmm3 psrld $20,%xmm6 pxor %xmm3,%xmm6 paddd %xmm6,%xmm2 pxor %xmm2,%xmm14 pshufb .rol8(%rip),%xmm14 paddd %xmm14,%xmm10 pxor %xmm10,%xmm6 movdqa %xmm6,%xmm3 pslld $7,%xmm3 psrld $25,%xmm6 pxor %xmm3,%xmm6 .byte 102,15,58,15,246,12 .byte 102,69,15,58,15,210,8 .byte 102,69,15,58,15,246,4 decq %r10 jnz 1b paddd .chacha20_consts(%rip),%xmm0 paddd .chacha20_consts(%rip),%xmm1 paddd .chacha20_consts(%rip),%xmm2 paddd %xmm7,%xmm4 paddd %xmm7,%xmm5 paddd %xmm7,%xmm6 paddd %xmm11,%xmm8 paddd %xmm11,%xmm9 paddd %xmm15,%xmm12 paddd .sse_inc(%rip),%xmm15 paddd %xmm15,%xmm13 pand .clamp(%rip),%xmm2 movdqa %xmm2,0(%rbp) movdqa %xmm6,16(%rbp) movq %r8,%r8 call poly_hash_ad_internal jmp seal_sse_128_seal .size chacha20_poly1305_seal, .-chacha20_poly1305_seal .type chacha20_poly1305_open_avx2,@function .align 64 chacha20_poly1305_open_avx2: vzeroupper vmovdqa .chacha20_consts(%rip),%ymm0 vbroadcasti128 0(%r9),%ymm4 vbroadcasti128 16(%r9),%ymm8 vbroadcasti128 32(%r9),%ymm12 vpaddd .avx2_init(%rip),%ymm12,%ymm12 cmpq $192,%rbx jbe open_avx2_192 cmpq $320,%rbx jbe open_avx2_320 vmovdqa %ymm4,64(%rbp) vmovdqa %ymm8,96(%rbp) vmovdqa %ymm12,160(%rbp) movq $10,%r10 1: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 decq %r10 jne 1b vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vpand .clamp(%rip),%ymm3,%ymm3 vmovdqa %ymm3,0(%rbp) vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 movq %r8,%r8 call poly_hash_ad_internal xorq %rcx,%rcx 1: addq 0(%rsi,%rcx), %r10 adcq 8+0(%rsi,%rcx), %r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 addq $16,%rcx cmpq $64,%rcx jne 1b vpxor 0(%rsi),%ymm0,%ymm0 vpxor 32(%rsi),%ymm4,%ymm4 vmovdqu %ymm0,0(%rdi) vmovdqu %ymm4,32(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi subq $64,%rbx 1: cmpq $512,%rbx jb 3f vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa %ymm0,%ymm3 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm15 vpaddd %ymm15,%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm15,256(%rbp) vmovdqa %ymm14,224(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm12,160(%rbp) xorq %rcx,%rcx 2: addq 0*8(%rsi,%rcx), %r10 adcq 8+0*8(%rsi,%rcx), %r11 adcq $1,%r12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 addq %rax,%r15 adcq %rdx,%r9 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 addq 2*8(%rsi,%rcx), %r10 adcq 8+2*8(%rsi,%rcx), %r11 adcq $1,%r12 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $4,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $12,%ymm15,%ymm15,%ymm15 vpalignr $4,%ymm6,%ymm6,%ymm6 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $4,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $4,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 addq %rax,%r15 adcq %rdx,%r9 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 addq 4*8(%rsi,%rcx), %r10 adcq 8+4*8(%rsi,%rcx), %r11 adcq $1,%r12 leaq 48(%rcx),%rcx vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 addq %rax,%r15 adcq %rdx,%r9 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $12,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $4,%ymm15,%ymm15,%ymm15 vpalignr $12,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $12,%ymm5,%ymm5,%ymm5 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $12,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm12,%ymm12,%ymm12 cmpq $60*8,%rcx jne 2b vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 vpaddd 64(%rbp),%ymm7,%ymm7 vpaddd 96(%rbp),%ymm11,%ymm11 vpaddd 256(%rbp),%ymm15,%ymm15 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vmovdqa %ymm0,128(%rbp) addq 60*8(%rsi),%r10 adcq 8+60*8(%rsi),%r11 adcq $1,%r12 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 vpxor 0+0(%rsi),%ymm0,%ymm0 vpxor 32+0(%rsi),%ymm3,%ymm3 vpxor 64+0(%rsi),%ymm7,%ymm7 vpxor 96+0(%rsi),%ymm11,%ymm11 vmovdqu %ymm0,0+0(%rdi) vmovdqu %ymm3,32+0(%rdi) vmovdqu %ymm7,64+0(%rdi) vmovdqu %ymm11,96+0(%rdi) vmovdqa 128(%rbp),%ymm0 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+128(%rsi),%ymm3,%ymm3 vpxor 32+128(%rsi),%ymm2,%ymm2 vpxor 64+128(%rsi),%ymm6,%ymm6 vpxor 96+128(%rsi),%ymm10,%ymm10 vmovdqu %ymm3,0+128(%rdi) vmovdqu %ymm2,32+128(%rdi) vmovdqu %ymm6,64+128(%rdi) vmovdqu %ymm10,96+128(%rdi) addq 60*8+16(%rsi),%r10 adcq 8+60*8+16(%rsi),%r11 adcq $1,%r12 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+256(%rsi),%ymm3,%ymm3 vpxor 32+256(%rsi),%ymm1,%ymm1 vpxor 64+256(%rsi),%ymm5,%ymm5 vpxor 96+256(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+256(%rdi) vmovdqu %ymm1,32+256(%rdi) vmovdqu %ymm5,64+256(%rdi) vmovdqu %ymm9,96+256(%rdi) movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 vpxor 0+384(%rsi),%ymm3,%ymm3 vpxor 32+384(%rsi),%ymm0,%ymm0 vpxor 64+384(%rsi),%ymm4,%ymm4 vpxor 96+384(%rsi),%ymm8,%ymm8 vmovdqu %ymm3,0+384(%rdi) vmovdqu %ymm0,32+384(%rdi) vmovdqu %ymm4,64+384(%rdi) vmovdqu %ymm8,96+384(%rdi) leaq 512(%rsi),%rsi leaq 512(%rdi),%rdi subq $512,%rbx jmp 1b 3: testq %rbx,%rbx vzeroupper je open_sse_finalize 3: cmpq $128,%rbx ja 3f vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm12 vmovdqa %ymm12,160(%rbp) xorq %r8,%r8 movq %rbx,%rcx andq $-16,%rcx testq %rcx,%rcx je 2f 1: addq 0*8(%rsi,%r8), %r10 adcq 8+0*8(%rsi,%r8), %r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 2: addq $16,%r8 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 cmpq %rcx,%r8 jb 1b cmpq $160,%r8 jne 2b vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 jmp open_avx2_tail_loop 3: cmpq $256,%rbx ja 3f vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) movq %rbx,128(%rbp) movq %rbx,%rcx subq $128,%rcx shrq $4,%rcx movq $10,%r8 cmpq $10,%rcx cmovgq %r8,%rcx movq %rsi,%rbx xorq %r8,%r8 1: addq 0(%rbx),%r10 adcq 8+0(%rbx),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rbx),%rbx 2: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 incq %r8 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm6,%ymm6,%ymm6 cmpq %rcx,%r8 jb 1b cmpq $10,%r8 jne 2b movq %rbx,%r8 subq %rsi,%rbx movq %rbx,%rcx movq 128(%rbp),%rbx 1: addq $16,%rcx cmpq %rbx,%rcx jg 1f addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%r8),%r8 jmp 1b 1: vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+0(%rsi),%ymm3,%ymm3 vpxor 32+0(%rsi),%ymm1,%ymm1 vpxor 64+0(%rsi),%ymm5,%ymm5 vpxor 96+0(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+0(%rdi) vmovdqu %ymm1,32+0(%rdi) vmovdqu %ymm5,64+0(%rdi) vmovdqu %ymm9,96+0(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 leaq 128(%rsi),%rsi leaq 128(%rdi),%rdi subq $128,%rbx jmp open_avx2_tail_loop 3: cmpq $384,%rbx ja 3f vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm14,224(%rbp) movq %rbx,128(%rbp) movq %rbx,%rcx subq $256,%rcx shrq $4,%rcx addq $6,%rcx movq $10,%r8 cmpq $10,%rcx cmovgq %r8,%rcx movq %rsi,%rbx xorq %r8,%r8 1: addq 0(%rbx),%r10 adcq 8+0(%rbx),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rbx),%rbx 2: vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm6,%ymm6,%ymm6 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 addq 0(%rbx),%r10 adcq 8+0(%rbx),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rbx),%rbx incq %r8 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm6,%ymm6,%ymm6 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 cmpq %rcx,%r8 jb 1b cmpq $10,%r8 jne 2b movq %rbx,%r8 subq %rsi,%rbx movq %rbx,%rcx movq 128(%rbp),%rbx 1: addq $16,%rcx cmpq %rbx,%rcx jg 1f addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%r8),%r8 jmp 1b 1: vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+0(%rsi),%ymm3,%ymm3 vpxor 32+0(%rsi),%ymm2,%ymm2 vpxor 64+0(%rsi),%ymm6,%ymm6 vpxor 96+0(%rsi),%ymm10,%ymm10 vmovdqu %ymm3,0+0(%rdi) vmovdqu %ymm2,32+0(%rdi) vmovdqu %ymm6,64+0(%rdi) vmovdqu %ymm10,96+0(%rdi) vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+128(%rsi),%ymm3,%ymm3 vpxor 32+128(%rsi),%ymm1,%ymm1 vpxor 64+128(%rsi),%ymm5,%ymm5 vpxor 96+128(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+128(%rdi) vmovdqu %ymm1,32+128(%rdi) vmovdqu %ymm5,64+128(%rdi) vmovdqu %ymm9,96+128(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 leaq 256(%rsi),%rsi leaq 256(%rdi),%rdi subq $256,%rbx jmp open_avx2_tail_loop 3: vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa %ymm0,%ymm3 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm15 vpaddd %ymm15,%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm15,256(%rbp) vmovdqa %ymm14,224(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm12,160(%rbp) xorq %rcx,%rcx movq %rsi,%r8 1: addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%r8),%r8 2: vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $4,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $12,%ymm15,%ymm15,%ymm15 vpalignr $4,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $4,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $4,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) addq 16(%r8),%r10 adcq 8+16(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 32(%r8),%r8 vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $12,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $4,%ymm15,%ymm15,%ymm15 vpalignr $12,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $12,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $12,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm12,%ymm12,%ymm12 incq %rcx cmpq $4,%rcx jl 1b cmpq $10,%rcx jne 2b movq %rbx,%rcx subq $384,%rcx andq $-16,%rcx 1: testq %rcx,%rcx je 1f addq 0(%r8),%r10 adcq 8+0(%r8),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%r8),%r8 subq $16,%rcx jmp 1b 1: vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 vpaddd 64(%rbp),%ymm7,%ymm7 vpaddd 96(%rbp),%ymm11,%ymm11 vpaddd 256(%rbp),%ymm15,%ymm15 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vmovdqa %ymm0,128(%rbp) vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 vpxor 0+0(%rsi),%ymm0,%ymm0 vpxor 32+0(%rsi),%ymm3,%ymm3 vpxor 64+0(%rsi),%ymm7,%ymm7 vpxor 96+0(%rsi),%ymm11,%ymm11 vmovdqu %ymm0,0+0(%rdi) vmovdqu %ymm3,32+0(%rdi) vmovdqu %ymm7,64+0(%rdi) vmovdqu %ymm11,96+0(%rdi) vmovdqa 128(%rbp),%ymm0 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+128(%rsi),%ymm3,%ymm3 vpxor 32+128(%rsi),%ymm2,%ymm2 vpxor 64+128(%rsi),%ymm6,%ymm6 vpxor 96+128(%rsi),%ymm10,%ymm10 vmovdqu %ymm3,0+128(%rdi) vmovdqu %ymm2,32+128(%rdi) vmovdqu %ymm6,64+128(%rdi) vmovdqu %ymm10,96+128(%rdi) vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+256(%rsi),%ymm3,%ymm3 vpxor 32+256(%rsi),%ymm1,%ymm1 vpxor 64+256(%rsi),%ymm5,%ymm5 vpxor 96+256(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+256(%rdi) vmovdqu %ymm1,32+256(%rdi) vmovdqu %ymm5,64+256(%rdi) vmovdqu %ymm9,96+256(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 leaq 384(%rsi),%rsi leaq 384(%rdi),%rdi subq $384,%rbx open_avx2_tail_loop: cmpq $32,%rbx jb open_avx2_tail subq $32,%rbx vpxor (%rsi),%ymm0,%ymm0 vmovdqu %ymm0,(%rdi) leaq 32(%rsi),%rsi leaq 32(%rdi),%rdi vmovdqa %ymm4,%ymm0 vmovdqa %ymm8,%ymm4 vmovdqa %ymm12,%ymm8 jmp open_avx2_tail_loop open_avx2_tail: cmpq $16,%rbx vmovdqa %xmm0,%xmm1 jb 1f subq $16,%rbx vpxor (%rsi),%xmm0,%xmm1 vmovdqu %xmm1,(%rdi) leaq 16(%rsi),%rsi leaq 16(%rdi),%rdi vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 vmovdqa %xmm0,%xmm1 1: vzeroupper jmp open_sse_tail_16 open_avx2_192: vmovdqa %ymm0,%ymm1 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm5 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm9 vmovdqa %ymm8,%ymm10 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 vmovdqa %ymm12,%ymm11 vmovdqa %ymm13,%ymm15 movq $10,%r10 1: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 decq %r10 jne 1b vpaddd %ymm2,%ymm0,%ymm0 vpaddd %ymm2,%ymm1,%ymm1 vpaddd %ymm6,%ymm4,%ymm4 vpaddd %ymm6,%ymm5,%ymm5 vpaddd %ymm10,%ymm8,%ymm8 vpaddd %ymm10,%ymm9,%ymm9 vpaddd %ymm11,%ymm12,%ymm12 vpaddd %ymm15,%ymm13,%ymm13 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vpand .clamp(%rip),%ymm3,%ymm3 vmovdqa %ymm3,0(%rbp) vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 open_avx2_short: movq %r8,%r8 call poly_hash_ad_internal open_avx2_hash_and_xor_loop: cmpq $32,%rbx jb open_avx2_short_tail_32 subq $32,%rbx addq 0(%rsi),%r10 adcq 8+0(%rsi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 addq 16(%rsi),%r10 adcq 8+16(%rsi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpxor (%rsi),%ymm0,%ymm0 vmovdqu %ymm0,(%rdi) leaq 32(%rsi),%rsi leaq 32(%rdi),%rdi vmovdqa %ymm4,%ymm0 vmovdqa %ymm8,%ymm4 vmovdqa %ymm12,%ymm8 vmovdqa %ymm1,%ymm12 vmovdqa %ymm5,%ymm1 vmovdqa %ymm9,%ymm5 vmovdqa %ymm13,%ymm9 vmovdqa %ymm2,%ymm13 vmovdqa %ymm6,%ymm2 jmp open_avx2_hash_and_xor_loop open_avx2_short_tail_32: cmpq $16,%rbx vmovdqa %xmm0,%xmm1 jb 1f subq $16,%rbx addq 0(%rsi),%r10 adcq 8+0(%rsi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpxor (%rsi),%xmm0,%xmm3 vmovdqu %xmm3,(%rdi) leaq 16(%rsi),%rsi leaq 16(%rdi),%rdi vextracti128 $1,%ymm0,%xmm1 1: vzeroupper jmp open_sse_tail_16 open_avx2_320: vmovdqa %ymm0,%ymm1 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm5 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm9 vmovdqa %ymm8,%ymm10 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm14,224(%rbp) movq $10,%r10 1: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm6,%ymm6,%ymm6 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm6,%ymm6,%ymm6 decq %r10 jne 1b vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd %ymm7,%ymm4,%ymm4 vpaddd %ymm7,%ymm5,%ymm5 vpaddd %ymm7,%ymm6,%ymm6 vpaddd %ymm11,%ymm8,%ymm8 vpaddd %ymm11,%ymm9,%ymm9 vpaddd %ymm11,%ymm10,%ymm10 vpaddd 160(%rbp),%ymm12,%ymm12 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd 224(%rbp),%ymm14,%ymm14 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vpand .clamp(%rip),%ymm3,%ymm3 vmovdqa %ymm3,0(%rbp) vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 jmp open_avx2_short .size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 .type chacha20_poly1305_seal_avx2,@function .align 64 chacha20_poly1305_seal_avx2: vzeroupper vmovdqa .chacha20_consts(%rip),%ymm0 vbroadcasti128 0(%r9),%ymm4 vbroadcasti128 16(%r9),%ymm8 vbroadcasti128 32(%r9),%ymm12 vpaddd .avx2_init(%rip),%ymm12,%ymm12 cmpq $192,%rbx jbe seal_avx2_192 cmpq $320,%rbx jbe seal_avx2_320 vmovdqa %ymm0,%ymm1 vmovdqa %ymm0,%ymm2 vmovdqa %ymm0,%ymm3 vmovdqa %ymm4,%ymm5 vmovdqa %ymm4,%ymm6 vmovdqa %ymm4,%ymm7 vmovdqa %ymm4,64(%rbp) vmovdqa %ymm8,%ymm9 vmovdqa %ymm8,%ymm10 vmovdqa %ymm8,%ymm11 vmovdqa %ymm8,96(%rbp) vmovdqa %ymm12,%ymm15 vpaddd .avx2_inc(%rip),%ymm15,%ymm14 vpaddd .avx2_inc(%rip),%ymm14,%ymm13 vpaddd .avx2_inc(%rip),%ymm13,%ymm12 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm14,224(%rbp) vmovdqa %ymm15,256(%rbp) movq $10,%r10 1: vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $4,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $12,%ymm15,%ymm15,%ymm15 vpalignr $4,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $4,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $4,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $12,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $4,%ymm15,%ymm15,%ymm15 vpalignr $12,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $12,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $12,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm12,%ymm12,%ymm12 decq %r10 jnz 1b vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 vpaddd 64(%rbp),%ymm7,%ymm7 vpaddd 96(%rbp),%ymm11,%ymm11 vpaddd 256(%rbp),%ymm15,%ymm15 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 vpand .clamp(%rip),%ymm15,%ymm15 vmovdqa %ymm15,0(%rbp) movq %r8,%r8 call poly_hash_ad_internal vpxor 0(%rsi),%ymm3,%ymm3 vpxor 32(%rsi),%ymm11,%ymm11 vmovdqu %ymm3,0(%rdi) vmovdqu %ymm11,32(%rdi) vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+64(%rsi),%ymm15,%ymm15 vpxor 32+64(%rsi),%ymm2,%ymm2 vpxor 64+64(%rsi),%ymm6,%ymm6 vpxor 96+64(%rsi),%ymm10,%ymm10 vmovdqu %ymm15,0+64(%rdi) vmovdqu %ymm2,32+64(%rdi) vmovdqu %ymm6,64+64(%rdi) vmovdqu %ymm10,96+64(%rdi) vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+192(%rsi),%ymm15,%ymm15 vpxor 32+192(%rsi),%ymm1,%ymm1 vpxor 64+192(%rsi),%ymm5,%ymm5 vpxor 96+192(%rsi),%ymm9,%ymm9 vmovdqu %ymm15,0+192(%rdi) vmovdqu %ymm1,32+192(%rdi) vmovdqu %ymm5,64+192(%rdi) vmovdqu %ymm9,96+192(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm15,%ymm8 leaq 320(%rsi),%rsi subq $320,%rbx movq $320,%rcx cmpq $128,%rbx jbe seal_avx2_hash vpxor 0(%rsi),%ymm0,%ymm0 vpxor 32(%rsi),%ymm4,%ymm4 vpxor 64(%rsi),%ymm8,%ymm8 vpxor 96(%rsi),%ymm12,%ymm12 vmovdqu %ymm0,320(%rdi) vmovdqu %ymm4,352(%rdi) vmovdqu %ymm8,384(%rdi) vmovdqu %ymm12,416(%rdi) leaq 128(%rsi),%rsi subq $128,%rbx movq $8,%rcx movq $2,%r8 cmpq $128,%rbx jbe seal_avx2_tail_128 cmpq $256,%rbx jbe seal_avx2_tail_256 cmpq $384,%rbx jbe seal_avx2_tail_384 cmpq $512,%rbx jbe seal_avx2_tail_512 vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa %ymm0,%ymm3 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm15 vpaddd %ymm15,%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm15,256(%rbp) vmovdqa %ymm14,224(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm12,160(%rbp) vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $4,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $12,%ymm15,%ymm15,%ymm15 vpalignr $4,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $4,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $4,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $12,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $4,%ymm15,%ymm15,%ymm15 vpalignr $12,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $12,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $12,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 subq $16,%rdi movq $9,%rcx jmp 4f 1: vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa %ymm0,%ymm3 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm15 vpaddd %ymm15,%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm15,256(%rbp) vmovdqa %ymm14,224(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm12,160(%rbp) movq $10,%rcx 2: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 addq %rax,%r15 adcq %rdx,%r9 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 4: vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $4,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $12,%ymm15,%ymm15,%ymm15 vpalignr $4,%ymm6,%ymm6,%ymm6 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $4,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $4,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 addq %rax,%r15 adcq %rdx,%r9 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 addq 32(%rdi),%r10 adcq 8+32(%rdi),%r11 adcq $1,%r12 leaq 48(%rdi),%rdi vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 addq %rax,%r15 adcq %rdx,%r9 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $12,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $4,%ymm15,%ymm15,%ymm15 vpalignr $12,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $12,%ymm5,%ymm5,%ymm5 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $12,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm12,%ymm12,%ymm12 decq %rcx jne 2b vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 vpaddd 64(%rbp),%ymm7,%ymm7 vpaddd 96(%rbp),%ymm11,%ymm11 vpaddd 256(%rbp),%ymm15,%ymm15 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 leaq 32(%rdi),%rdi vmovdqa %ymm0,128(%rbp) addq -32(%rdi),%r10 adcq 8+-32(%rdi),%r11 adcq $1,%r12 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 vpxor 0+0(%rsi),%ymm0,%ymm0 vpxor 32+0(%rsi),%ymm3,%ymm3 vpxor 64+0(%rsi),%ymm7,%ymm7 vpxor 96+0(%rsi),%ymm11,%ymm11 vmovdqu %ymm0,0+0(%rdi) vmovdqu %ymm3,32+0(%rdi) vmovdqu %ymm7,64+0(%rdi) vmovdqu %ymm11,96+0(%rdi) vmovdqa 128(%rbp),%ymm0 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+128(%rsi),%ymm3,%ymm3 vpxor 32+128(%rsi),%ymm2,%ymm2 vpxor 64+128(%rsi),%ymm6,%ymm6 vpxor 96+128(%rsi),%ymm10,%ymm10 vmovdqu %ymm3,0+128(%rdi) vmovdqu %ymm2,32+128(%rdi) vmovdqu %ymm6,64+128(%rdi) vmovdqu %ymm10,96+128(%rdi) addq -16(%rdi),%r10 adcq 8+-16(%rdi),%r11 adcq $1,%r12 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+256(%rsi),%ymm3,%ymm3 vpxor 32+256(%rsi),%ymm1,%ymm1 vpxor 64+256(%rsi),%ymm5,%ymm5 vpxor 96+256(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+256(%rdi) vmovdqu %ymm1,32+256(%rdi) vmovdqu %ymm5,64+256(%rdi) vmovdqu %ymm9,96+256(%rdi) movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 vpxor 0+384(%rsi),%ymm3,%ymm3 vpxor 32+384(%rsi),%ymm0,%ymm0 vpxor 64+384(%rsi),%ymm4,%ymm4 vpxor 96+384(%rsi),%ymm8,%ymm8 vmovdqu %ymm3,0+384(%rdi) vmovdqu %ymm0,32+384(%rdi) vmovdqu %ymm4,64+384(%rdi) vmovdqu %ymm8,96+384(%rdi) leaq 512(%rsi),%rsi subq $512,%rbx cmpq $512,%rbx jg 1b addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 32(%rdi),%rdi movq $10,%rcx xorq %r8,%r8 cmpq $128,%rbx ja 3f seal_avx2_tail_128: vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm12 vmovdqa %ymm12,160(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 32(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 jmp seal_avx2_short_loop 3: cmpq $256,%rbx ja 3f seal_avx2_tail_256: vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 32(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+0(%rsi),%ymm3,%ymm3 vpxor 32+0(%rsi),%ymm1,%ymm1 vpxor 64+0(%rsi),%ymm5,%ymm5 vpxor 96+0(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+0(%rdi) vmovdqu %ymm1,32+0(%rdi) vmovdqu %ymm5,64+0(%rdi) vmovdqu %ymm9,96+0(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 movq $128,%rcx leaq 128(%rsi),%rsi subq $128,%rbx jmp seal_avx2_hash 3: cmpq $384,%rbx ja seal_avx2_tail_512 seal_avx2_tail_384: vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm14,224(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm6,%ymm6,%ymm6 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm6,%ymm6,%ymm6 leaq 32(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+0(%rsi),%ymm3,%ymm3 vpxor 32+0(%rsi),%ymm2,%ymm2 vpxor 64+0(%rsi),%ymm6,%ymm6 vpxor 96+0(%rsi),%ymm10,%ymm10 vmovdqu %ymm3,0+0(%rdi) vmovdqu %ymm2,32+0(%rdi) vmovdqu %ymm6,64+0(%rdi) vmovdqu %ymm10,96+0(%rdi) vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+128(%rsi),%ymm3,%ymm3 vpxor 32+128(%rsi),%ymm1,%ymm1 vpxor 64+128(%rsi),%ymm5,%ymm5 vpxor 96+128(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+128(%rdi) vmovdqu %ymm1,32+128(%rdi) vmovdqu %ymm5,64+128(%rdi) vmovdqu %ymm9,96+128(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 movq $256,%rcx leaq 256(%rsi),%rsi subq $256,%rbx jmp seal_avx2_hash seal_avx2_tail_512: vmovdqa .chacha20_consts(%rip),%ymm0 vmovdqa 64(%rbp),%ymm4 vmovdqa 96(%rbp),%ymm8 vmovdqa %ymm0,%ymm1 vmovdqa %ymm4,%ymm5 vmovdqa %ymm8,%ymm9 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm10 vmovdqa %ymm0,%ymm3 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa .avx2_inc(%rip),%ymm12 vpaddd 160(%rbp),%ymm12,%ymm15 vpaddd %ymm15,%ymm12,%ymm14 vpaddd %ymm14,%ymm12,%ymm13 vpaddd %ymm13,%ymm12,%ymm12 vmovdqa %ymm15,256(%rbp) vmovdqa %ymm14,224(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm12,160(%rbp) 1: addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi 2: vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $4,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $12,%ymm15,%ymm15,%ymm15 vpalignr $4,%ymm6,%ymm6,%ymm6 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $4,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 addq %rax,%r15 adcq %rdx,%r9 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $4,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm12,%ymm12,%ymm12 vmovdqa %ymm8,128(%rbp) vmovdqa .rol16(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $20,%ymm7,%ymm8 vpslld $32-20,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $20,%ymm6,%ymm8 vpslld $32-20,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $20,%ymm5,%ymm8 vpslld $32-20,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $20,%ymm4,%ymm8 vpslld $32-20,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 vmovdqa .rol8(%rip),%ymm8 vpaddd %ymm7,%ymm3,%ymm3 vpaddd %ymm6,%ymm2,%ymm2 vpaddd %ymm5,%ymm1,%ymm1 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm3,%ymm15,%ymm15 vpxor %ymm2,%ymm14,%ymm14 vpxor %ymm1,%ymm13,%ymm13 vpxor %ymm0,%ymm12,%ymm12 vpshufb %ymm8,%ymm15,%ymm15 vpshufb %ymm8,%ymm14,%ymm14 vpshufb %ymm8,%ymm13,%ymm13 vpshufb %ymm8,%ymm12,%ymm12 vmovdqa 128(%rbp),%ymm8 vpaddd %ymm15,%ymm11,%ymm11 vpaddd %ymm14,%ymm10,%ymm10 vpaddd %ymm13,%ymm9,%ymm9 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm11,%ymm7,%ymm7 vpxor %ymm10,%ymm6,%ymm6 movq 0+0(%rbp),%rdx movq %rdx,%r15 mulxq %r10,%r13,%r14 mulxq %r11,%rax,%rdx imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 vpxor %ymm9,%ymm5,%ymm5 vpxor %ymm8,%ymm4,%ymm4 vmovdqa %ymm8,128(%rbp) vpsrld $25,%ymm7,%ymm8 vpslld $32-25,%ymm7,%ymm7 vpxor %ymm8,%ymm7,%ymm7 vpsrld $25,%ymm6,%ymm8 vpslld $32-25,%ymm6,%ymm6 vpxor %ymm8,%ymm6,%ymm6 vpsrld $25,%ymm5,%ymm8 vpslld $32-25,%ymm5,%ymm5 vpxor %ymm8,%ymm5,%ymm5 vpsrld $25,%ymm4,%ymm8 vpslld $32-25,%ymm4,%ymm4 vpxor %ymm8,%ymm4,%ymm4 vmovdqa 128(%rbp),%ymm8 vpalignr $12,%ymm7,%ymm7,%ymm7 vpalignr $8,%ymm11,%ymm11,%ymm11 vpalignr $4,%ymm15,%ymm15,%ymm15 vpalignr $12,%ymm6,%ymm6,%ymm6 movq 8+0(%rbp),%rdx mulxq %r10,%r10,%rax addq %r10,%r14 mulxq %r11,%r11,%r9 adcq %r11,%r15 adcq $0,%r9 imul %r12,%rdx vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $12,%ymm5,%ymm5,%ymm5 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $12,%ymm4,%ymm4,%ymm4 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm12,%ymm12,%ymm12 addq %rax,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 32(%rdi),%rdi decq %rcx jg 1b decq %r8 jge 2b vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 vpaddd 64(%rbp),%ymm7,%ymm7 vpaddd 96(%rbp),%ymm11,%ymm11 vpaddd 256(%rbp),%ymm15,%ymm15 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd 64(%rbp),%ymm6,%ymm6 vpaddd 96(%rbp),%ymm10,%ymm10 vpaddd 224(%rbp),%ymm14,%ymm14 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd 64(%rbp),%ymm5,%ymm5 vpaddd 96(%rbp),%ymm9,%ymm9 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd 64(%rbp),%ymm4,%ymm4 vpaddd 96(%rbp),%ymm8,%ymm8 vpaddd 160(%rbp),%ymm12,%ymm12 vmovdqa %ymm0,128(%rbp) vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 vpxor 0+0(%rsi),%ymm0,%ymm0 vpxor 32+0(%rsi),%ymm3,%ymm3 vpxor 64+0(%rsi),%ymm7,%ymm7 vpxor 96+0(%rsi),%ymm11,%ymm11 vmovdqu %ymm0,0+0(%rdi) vmovdqu %ymm3,32+0(%rdi) vmovdqu %ymm7,64+0(%rdi) vmovdqu %ymm11,96+0(%rdi) vmovdqa 128(%rbp),%ymm0 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 vpxor 0+128(%rsi),%ymm3,%ymm3 vpxor 32+128(%rsi),%ymm2,%ymm2 vpxor 64+128(%rsi),%ymm6,%ymm6 vpxor 96+128(%rsi),%ymm10,%ymm10 vmovdqu %ymm3,0+128(%rdi) vmovdqu %ymm2,32+128(%rdi) vmovdqu %ymm6,64+128(%rdi) vmovdqu %ymm10,96+128(%rdi) vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 vpxor 0+256(%rsi),%ymm3,%ymm3 vpxor 32+256(%rsi),%ymm1,%ymm1 vpxor 64+256(%rsi),%ymm5,%ymm5 vpxor 96+256(%rsi),%ymm9,%ymm9 vmovdqu %ymm3,0+256(%rdi) vmovdqu %ymm1,32+256(%rdi) vmovdqu %ymm5,64+256(%rdi) vmovdqu %ymm9,96+256(%rdi) vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 vmovdqa %ymm3,%ymm8 movq $384,%rcx leaq 384(%rsi),%rsi subq $384,%rbx jmp seal_avx2_hash seal_avx2_320: vmovdqa %ymm0,%ymm1 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm5 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm9 vmovdqa %ymm8,%ymm10 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 vmovdqa %ymm4,%ymm7 vmovdqa %ymm8,%ymm11 vmovdqa %ymm12,160(%rbp) vmovdqa %ymm13,192(%rbp) vmovdqa %ymm14,224(%rbp) movq $10,%r10 1: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $12,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $4,%ymm6,%ymm6,%ymm6 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol16(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpsrld $20,%ymm6,%ymm3 vpslld $12,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpaddd %ymm6,%ymm2,%ymm2 vpxor %ymm2,%ymm14,%ymm14 vpshufb .rol8(%rip),%ymm14,%ymm14 vpaddd %ymm14,%ymm10,%ymm10 vpxor %ymm10,%ymm6,%ymm6 vpslld $7,%ymm6,%ymm3 vpsrld $25,%ymm6,%ymm6 vpxor %ymm3,%ymm6,%ymm6 vpalignr $4,%ymm14,%ymm14,%ymm14 vpalignr $8,%ymm10,%ymm10,%ymm10 vpalignr $12,%ymm6,%ymm6,%ymm6 decq %r10 jne 1b vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 vpaddd %ymm7,%ymm4,%ymm4 vpaddd %ymm7,%ymm5,%ymm5 vpaddd %ymm7,%ymm6,%ymm6 vpaddd %ymm11,%ymm8,%ymm8 vpaddd %ymm11,%ymm9,%ymm9 vpaddd %ymm11,%ymm10,%ymm10 vpaddd 160(%rbp),%ymm12,%ymm12 vpaddd 192(%rbp),%ymm13,%ymm13 vpaddd 224(%rbp),%ymm14,%ymm14 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vpand .clamp(%rip),%ymm3,%ymm3 vmovdqa %ymm3,0(%rbp) vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 jmp seal_avx2_short seal_avx2_192: vmovdqa %ymm0,%ymm1 vmovdqa %ymm0,%ymm2 vmovdqa %ymm4,%ymm5 vmovdqa %ymm4,%ymm6 vmovdqa %ymm8,%ymm9 vmovdqa %ymm8,%ymm10 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 vmovdqa %ymm12,%ymm11 vmovdqa %ymm13,%ymm15 movq $10,%r10 1: vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $12,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $4,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $12,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $4,%ymm5,%ymm5,%ymm5 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol16(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpsrld $20,%ymm4,%ymm3 vpslld $12,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpaddd %ymm4,%ymm0,%ymm0 vpxor %ymm0,%ymm12,%ymm12 vpshufb .rol8(%rip),%ymm12,%ymm12 vpaddd %ymm12,%ymm8,%ymm8 vpxor %ymm8,%ymm4,%ymm4 vpslld $7,%ymm4,%ymm3 vpsrld $25,%ymm4,%ymm4 vpxor %ymm3,%ymm4,%ymm4 vpalignr $4,%ymm12,%ymm12,%ymm12 vpalignr $8,%ymm8,%ymm8,%ymm8 vpalignr $12,%ymm4,%ymm4,%ymm4 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol16(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpsrld $20,%ymm5,%ymm3 vpslld $12,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpaddd %ymm5,%ymm1,%ymm1 vpxor %ymm1,%ymm13,%ymm13 vpshufb .rol8(%rip),%ymm13,%ymm13 vpaddd %ymm13,%ymm9,%ymm9 vpxor %ymm9,%ymm5,%ymm5 vpslld $7,%ymm5,%ymm3 vpsrld $25,%ymm5,%ymm5 vpxor %ymm3,%ymm5,%ymm5 vpalignr $4,%ymm13,%ymm13,%ymm13 vpalignr $8,%ymm9,%ymm9,%ymm9 vpalignr $12,%ymm5,%ymm5,%ymm5 decq %r10 jne 1b vpaddd %ymm2,%ymm0,%ymm0 vpaddd %ymm2,%ymm1,%ymm1 vpaddd %ymm6,%ymm4,%ymm4 vpaddd %ymm6,%ymm5,%ymm5 vpaddd %ymm10,%ymm8,%ymm8 vpaddd %ymm10,%ymm9,%ymm9 vpaddd %ymm11,%ymm12,%ymm12 vpaddd %ymm15,%ymm13,%ymm13 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 vpand .clamp(%rip),%ymm3,%ymm3 vmovdqa %ymm3,0(%rbp) vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 seal_avx2_short: movq %r8,%r8 call poly_hash_ad_internal xorq %rcx,%rcx seal_avx2_hash: cmpq $16,%rcx jb seal_avx2_short_loop addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 subq $16,%rcx addq $16,%rdi jmp seal_avx2_hash seal_avx2_short_loop: cmpq $32,%rbx jb seal_avx2_short_tail subq $32,%rbx vpxor (%rsi),%ymm0,%ymm0 vmovdqu %ymm0,(%rdi) leaq 32(%rsi),%rsi addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 addq 16(%rdi),%r10 adcq 8+16(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 32(%rdi),%rdi vmovdqa %ymm4,%ymm0 vmovdqa %ymm8,%ymm4 vmovdqa %ymm12,%ymm8 vmovdqa %ymm1,%ymm12 vmovdqa %ymm5,%ymm1 vmovdqa %ymm9,%ymm5 vmovdqa %ymm13,%ymm9 vmovdqa %ymm2,%ymm13 vmovdqa %ymm6,%ymm2 jmp seal_avx2_short_loop seal_avx2_short_tail: cmpq $16,%rbx jb 1f subq $16,%rbx vpxor (%rsi),%xmm0,%xmm3 vmovdqu %xmm3,(%rdi) leaq 16(%rsi),%rsi addq 0(%rdi),%r10 adcq 8+0(%rdi),%r11 adcq $1,%r12 movq 0+0(%rbp),%rax movq %rax,%r15 mulq %r10 movq %rax,%r13 movq %rdx,%r14 movq 0+0(%rbp),%rax mulq %r11 imul %r12,%r15 addq %rax,%r14 adcq %rdx,%r15 movq 8+0(%rbp),%rax movq %rax,%r9 mulq %r10 addq %rax,%r14 adcq $0,%rdx movq %rdx,%r10 movq 8+0(%rbp),%rax mulq %r11 addq %rax,%r15 adcq $0,%rdx imul %r12,%r9 addq %r10,%r15 adcq %rdx,%r9 movq %r13,%r10 movq %r14,%r11 movq %r15,%r12 andq $3,%r12 movq %r15,%r13 andq $-4,%r13 movq %r9,%r14 shrdq $2,%r9,%r15 shrq $2,%r9 addq %r13,%r10 adcq %r14,%r11 adcq $0,%r12 addq %r15,%r10 adcq %r9,%r11 adcq $0,%r12 leaq 16(%rdi),%rdi vextracti128 $1,%ymm0,%xmm0 1: vzeroupper jmp seal_sse_tail_16 .cfi_endproc #endif