summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Sloan <varomodt@google.com>2019-03-27 13:43:08 -0700
committerandroid-build-merger <android-build-merger@google.com>2019-03-27 13:43:08 -0700
commit62208c3eba92db0919b9b96e45d9ccaab45eafce (patch)
tree626e5760d024ba621183d506e78f0dcda4792302
parentbdfba2a0b5cfa78c35c71b35bd385a9acfc3ec14 (diff)
parentd434976fb7e05ec9c76227d7131f403bc8085365 (diff)
downloadboringssl-62208c3eba92db0919b9b96e45d9ccaab45eafce.tar.gz
external/boringssl: Sync to df11bed9ee05141b54da7b88cc5b7960ca858164. am: 59e995095f
am: d434976fb7 Change-Id: I6c39e74c67ae7b8e09376d558b25f1b358632e03
-rw-r--r--BORINGSSL_REVISION2
-rw-r--r--eureka.mk1
-rw-r--r--linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S1622
-rw-r--r--linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S249
-rw-r--r--mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S1609
-rw-r--r--mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S249
-rw-r--r--sources.bp1
-rw-r--r--sources.mk1
-rw-r--r--src/crypto/asn1/a_int.c79
-rw-r--r--src/crypto/asn1/asn1_lib.c6
-rw-r--r--src/crypto/cipher_extra/e_aesccm.c3
-rw-r--r--src/crypto/cipher_extra/e_aesctrhmac.c4
-rw-r--r--src/crypto/cipher_extra/e_aesgcmsiv.c5
-rw-r--r--src/crypto/fipsmodule/CMakeLists.txt2
-rw-r--r--src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl3227
-rw-r--r--src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl299
-rw-r--r--src/crypto/fipsmodule/aes/internal.h8
-rw-r--r--src/crypto/fipsmodule/bn/ctx.c249
-rw-r--r--src/crypto/fipsmodule/bn/exponentiation.c7
-rw-r--r--src/crypto/fipsmodule/cipher/e_aes.c14
-rw-r--r--src/crypto/fipsmodule/cipher/internal.h4
-rw-r--r--src/crypto/fipsmodule/rand/ctrdrbg.c10
-rw-r--r--src/crypto/impl_dispatch_test.cc8
-rw-r--r--src/include/openssl/asn1.h1
-rw-r--r--src/tool/speed.cc4
-rw-r--r--win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm1777
-rw-r--r--win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm293
27 files changed, 1220 insertions, 8514 deletions
diff --git a/BORINGSSL_REVISION b/BORINGSSL_REVISION
index 42ad2f07..6b8d547c 100644
--- a/BORINGSSL_REVISION
+++ b/BORINGSSL_REVISION
@@ -1 +1 @@
-fdb48f98612e934eab339b4871484b1c987553e2
+df11bed9ee05141b54da7b88cc5b7960ca858164
diff --git a/eureka.mk b/eureka.mk
index 43de1776..9824521b 100644
--- a/eureka.mk
+++ b/eureka.mk
@@ -348,7 +348,6 @@ linux_x86_64_sources := \
linux-x86_64/crypto/fipsmodule/aes-x86_64.S\
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S\
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S\
- linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S\
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S\
linux-x86_64/crypto/fipsmodule/ghash-x86_64.S\
linux-x86_64/crypto/fipsmodule/md5-x86_64.S\
diff --git a/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S b/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
deleted file mode 100644
index 5437762f..00000000
--- a/linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S
+++ /dev/null
@@ -1,1622 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-#endif
-
-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-
-.type _bsaes_encrypt8,@function
-.align 64
-_bsaes_encrypt8:
-.cfi_startproc
- leaq .LBS0(%rip),%r11
-
- movdqa (%rax),%xmm8
- leaq 16(%rax),%rax
- movdqa 80(%r11),%xmm7
- pxor %xmm8,%xmm15
- pxor %xmm8,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm8,%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor %xmm8,%xmm3
- pxor %xmm8,%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor %xmm8,%xmm5
- pxor %xmm8,%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
-_bsaes_encrypt8_bitslice:
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm5,%xmm9
- psrlq $1,%xmm5
- movdqa %xmm3,%xmm10
- psrlq $1,%xmm3
- pxor %xmm6,%xmm5
- pxor %xmm4,%xmm3
- pand %xmm7,%xmm5
- pand %xmm7,%xmm3
- pxor %xmm5,%xmm6
- psllq $1,%xmm5
- pxor %xmm3,%xmm4
- psllq $1,%xmm3
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm3
- movdqa %xmm1,%xmm9
- psrlq $1,%xmm1
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm2,%xmm1
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm1
- pand %xmm7,%xmm15
- pxor %xmm1,%xmm2
- psllq $1,%xmm1
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm1
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm4,%xmm9
- psrlq $2,%xmm4
- movdqa %xmm3,%xmm10
- psrlq $2,%xmm3
- pxor %xmm6,%xmm4
- pxor %xmm5,%xmm3
- pand %xmm8,%xmm4
- pand %xmm8,%xmm3
- pxor %xmm4,%xmm6
- psllq $2,%xmm4
- pxor %xmm3,%xmm5
- psllq $2,%xmm3
- pxor %xmm9,%xmm4
- pxor %xmm10,%xmm3
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm2,%xmm0
- pxor %xmm1,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm2
- psllq $2,%xmm0
- pxor %xmm15,%xmm1
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm2,%xmm9
- psrlq $4,%xmm2
- movdqa %xmm1,%xmm10
- psrlq $4,%xmm1
- pxor %xmm6,%xmm2
- pxor %xmm5,%xmm1
- pand %xmm7,%xmm2
- pand %xmm7,%xmm1
- pxor %xmm2,%xmm6
- psllq $4,%xmm2
- pxor %xmm1,%xmm5
- psllq $4,%xmm1
- pxor %xmm9,%xmm2
- pxor %xmm10,%xmm1
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm4
- psllq $4,%xmm0
- pxor %xmm15,%xmm3
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- decl %r10d
- jmp .Lenc_sbox
-.align 16
-.Lenc_loop:
- pxor 0(%rax),%xmm15
- pxor 16(%rax),%xmm0
- pxor 32(%rax),%xmm1
- pxor 48(%rax),%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor 64(%rax),%xmm3
- pxor 80(%rax),%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor 96(%rax),%xmm5
- pxor 112(%rax),%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- leaq 128(%rax),%rax
-.Lenc_sbox:
- pxor %xmm5,%xmm4
- pxor %xmm0,%xmm1
- pxor %xmm15,%xmm2
- pxor %xmm1,%xmm5
- pxor %xmm15,%xmm4
-
- pxor %xmm2,%xmm5
- pxor %xmm6,%xmm2
- pxor %xmm4,%xmm6
- pxor %xmm3,%xmm2
- pxor %xmm4,%xmm3
- pxor %xmm0,%xmm2
-
- pxor %xmm6,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm6,%xmm10
- movdqa %xmm0,%xmm9
- movdqa %xmm4,%xmm8
- movdqa %xmm1,%xmm12
- movdqa %xmm5,%xmm11
-
- pxor %xmm3,%xmm10
- pxor %xmm1,%xmm9
- pxor %xmm2,%xmm8
- movdqa %xmm10,%xmm13
- pxor %xmm3,%xmm12
- movdqa %xmm9,%xmm7
- pxor %xmm15,%xmm11
- movdqa %xmm10,%xmm14
-
- por %xmm8,%xmm9
- por %xmm11,%xmm10
- pxor %xmm7,%xmm14
- pand %xmm11,%xmm13
- pxor %xmm8,%xmm11
- pand %xmm8,%xmm7
- pand %xmm11,%xmm14
- movdqa %xmm2,%xmm11
- pxor %xmm15,%xmm11
- pand %xmm11,%xmm12
- pxor %xmm12,%xmm10
- pxor %xmm12,%xmm9
- movdqa %xmm6,%xmm12
- movdqa %xmm4,%xmm11
- pxor %xmm0,%xmm12
- pxor %xmm5,%xmm11
- movdqa %xmm12,%xmm8
- pand %xmm11,%xmm12
- por %xmm11,%xmm8
- pxor %xmm12,%xmm7
- pxor %xmm14,%xmm10
- pxor %xmm13,%xmm9
- pxor %xmm14,%xmm8
- movdqa %xmm1,%xmm11
- pxor %xmm13,%xmm7
- movdqa %xmm3,%xmm12
- pxor %xmm13,%xmm8
- movdqa %xmm0,%xmm13
- pand %xmm2,%xmm11
- movdqa %xmm6,%xmm14
- pand %xmm15,%xmm12
- pand %xmm4,%xmm13
- por %xmm5,%xmm14
- pxor %xmm11,%xmm10
- pxor %xmm12,%xmm9
- pxor %xmm13,%xmm8
- pxor %xmm14,%xmm7
-
-
-
-
-
- movdqa %xmm10,%xmm11
- pand %xmm8,%xmm10
- pxor %xmm9,%xmm11
-
- movdqa %xmm7,%xmm13
- movdqa %xmm11,%xmm14
- pxor %xmm10,%xmm13
- pand %xmm13,%xmm14
-
- movdqa %xmm8,%xmm12
- pxor %xmm9,%xmm14
- pxor %xmm7,%xmm12
-
- pxor %xmm9,%xmm10
-
- pand %xmm10,%xmm12
-
- movdqa %xmm13,%xmm9
- pxor %xmm7,%xmm12
-
- pxor %xmm12,%xmm9
- pxor %xmm12,%xmm8
-
- pand %xmm7,%xmm9
-
- pxor %xmm9,%xmm13
- pxor %xmm9,%xmm8
-
- pand %xmm14,%xmm13
-
- pxor %xmm11,%xmm13
- movdqa %xmm5,%xmm11
- movdqa %xmm4,%xmm7
- movdqa %xmm14,%xmm9
- pxor %xmm13,%xmm9
- pand %xmm5,%xmm9
- pxor %xmm4,%xmm5
- pand %xmm14,%xmm4
- pand %xmm13,%xmm5
- pxor %xmm4,%xmm5
- pxor %xmm9,%xmm4
- pxor %xmm15,%xmm11
- pxor %xmm2,%xmm7
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm15,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm2,%xmm15
- pand %xmm14,%xmm7
- pand %xmm12,%xmm2
- pand %xmm13,%xmm11
- pand %xmm8,%xmm15
- pxor %xmm11,%xmm7
- pxor %xmm2,%xmm15
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm2
- pxor %xmm11,%xmm5
- pxor %xmm11,%xmm15
- pxor %xmm7,%xmm4
- pxor %xmm7,%xmm2
-
- movdqa %xmm6,%xmm11
- movdqa %xmm0,%xmm7
- pxor %xmm3,%xmm11
- pxor %xmm1,%xmm7
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm3,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm1,%xmm3
- pand %xmm14,%xmm7
- pand %xmm12,%xmm1
- pand %xmm13,%xmm11
- pand %xmm8,%xmm3
- pxor %xmm11,%xmm7
- pxor %xmm1,%xmm3
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm1
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- pxor %xmm13,%xmm10
- pand %xmm6,%xmm10
- pxor %xmm0,%xmm6
- pand %xmm14,%xmm0
- pand %xmm13,%xmm6
- pxor %xmm0,%xmm6
- pxor %xmm10,%xmm0
- pxor %xmm11,%xmm6
- pxor %xmm11,%xmm3
- pxor %xmm7,%xmm0
- pxor %xmm7,%xmm1
- pxor %xmm15,%xmm6
- pxor %xmm5,%xmm0
- pxor %xmm6,%xmm3
- pxor %xmm15,%xmm5
- pxor %xmm0,%xmm15
-
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- pxor %xmm2,%xmm1
- pxor %xmm4,%xmm2
- pxor %xmm4,%xmm3
-
- pxor %xmm2,%xmm5
- decl %r10d
- jl .Lenc_done
- pshufd $0x93,%xmm15,%xmm7
- pshufd $0x93,%xmm0,%xmm8
- pxor %xmm7,%xmm15
- pshufd $0x93,%xmm3,%xmm9
- pxor %xmm8,%xmm0
- pshufd $0x93,%xmm5,%xmm10
- pxor %xmm9,%xmm3
- pshufd $0x93,%xmm2,%xmm11
- pxor %xmm10,%xmm5
- pshufd $0x93,%xmm6,%xmm12
- pxor %xmm11,%xmm2
- pshufd $0x93,%xmm1,%xmm13
- pxor %xmm12,%xmm6
- pshufd $0x93,%xmm4,%xmm14
- pxor %xmm13,%xmm1
- pxor %xmm14,%xmm4
-
- pxor %xmm15,%xmm8
- pxor %xmm4,%xmm7
- pxor %xmm4,%xmm8
- pshufd $0x4E,%xmm15,%xmm15
- pxor %xmm0,%xmm9
- pshufd $0x4E,%xmm0,%xmm0
- pxor %xmm2,%xmm12
- pxor %xmm7,%xmm15
- pxor %xmm6,%xmm13
- pxor %xmm8,%xmm0
- pxor %xmm5,%xmm11
- pshufd $0x4E,%xmm2,%xmm7
- pxor %xmm1,%xmm14
- pshufd $0x4E,%xmm6,%xmm8
- pxor %xmm3,%xmm10
- pshufd $0x4E,%xmm5,%xmm2
- pxor %xmm4,%xmm10
- pshufd $0x4E,%xmm4,%xmm6
- pxor %xmm4,%xmm11
- pshufd $0x4E,%xmm1,%xmm5
- pxor %xmm11,%xmm7
- pshufd $0x4E,%xmm3,%xmm1
- pxor %xmm12,%xmm8
- pxor %xmm10,%xmm2
- pxor %xmm14,%xmm6
- pxor %xmm13,%xmm5
- movdqa %xmm7,%xmm3
- pxor %xmm9,%xmm1
- movdqa %xmm8,%xmm4
- movdqa 48(%r11),%xmm7
- jnz .Lenc_loop
- movdqa 64(%r11),%xmm7
- jmp .Lenc_loop
-.align 16
-.Lenc_done:
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm1,%xmm9
- psrlq $1,%xmm1
- movdqa %xmm2,%xmm10
- psrlq $1,%xmm2
- pxor %xmm4,%xmm1
- pxor %xmm6,%xmm2
- pand %xmm7,%xmm1
- pand %xmm7,%xmm2
- pxor %xmm1,%xmm4
- psllq $1,%xmm1
- pxor %xmm2,%xmm6
- psllq $1,%xmm2
- pxor %xmm9,%xmm1
- pxor %xmm10,%xmm2
- movdqa %xmm3,%xmm9
- psrlq $1,%xmm3
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm5,%xmm3
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm3
- pand %xmm7,%xmm15
- pxor %xmm3,%xmm5
- psllq $1,%xmm3
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm3
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm6,%xmm9
- psrlq $2,%xmm6
- movdqa %xmm2,%xmm10
- psrlq $2,%xmm2
- pxor %xmm4,%xmm6
- pxor %xmm1,%xmm2
- pand %xmm8,%xmm6
- pand %xmm8,%xmm2
- pxor %xmm6,%xmm4
- psllq $2,%xmm6
- pxor %xmm2,%xmm1
- psllq $2,%xmm2
- pxor %xmm9,%xmm6
- pxor %xmm10,%xmm2
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm5,%xmm0
- pxor %xmm3,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm5
- psllq $2,%xmm0
- pxor %xmm15,%xmm3
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm5,%xmm9
- psrlq $4,%xmm5
- movdqa %xmm3,%xmm10
- psrlq $4,%xmm3
- pxor %xmm4,%xmm5
- pxor %xmm1,%xmm3
- pand %xmm7,%xmm5
- pand %xmm7,%xmm3
- pxor %xmm5,%xmm4
- psllq $4,%xmm5
- pxor %xmm3,%xmm1
- psllq $4,%xmm3
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm3
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm6,%xmm0
- pxor %xmm2,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm6
- psllq $4,%xmm0
- pxor %xmm15,%xmm2
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa (%rax),%xmm7
- pxor %xmm7,%xmm3
- pxor %xmm7,%xmm5
- pxor %xmm7,%xmm2
- pxor %xmm7,%xmm6
- pxor %xmm7,%xmm1
- pxor %xmm7,%xmm4
- pxor %xmm7,%xmm15
- pxor %xmm7,%xmm0
- .byte 0xf3,0xc3
-.cfi_endproc
-.size _bsaes_encrypt8,.-_bsaes_encrypt8
-
-.type _bsaes_decrypt8,@function
-.align 64
-_bsaes_decrypt8:
-.cfi_startproc
- leaq .LBS0(%rip),%r11
-
- movdqa (%rax),%xmm8
- leaq 16(%rax),%rax
- movdqa -48(%r11),%xmm7
- pxor %xmm8,%xmm15
- pxor %xmm8,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm8,%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor %xmm8,%xmm3
- pxor %xmm8,%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor %xmm8,%xmm5
- pxor %xmm8,%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm5,%xmm9
- psrlq $1,%xmm5
- movdqa %xmm3,%xmm10
- psrlq $1,%xmm3
- pxor %xmm6,%xmm5
- pxor %xmm4,%xmm3
- pand %xmm7,%xmm5
- pand %xmm7,%xmm3
- pxor %xmm5,%xmm6
- psllq $1,%xmm5
- pxor %xmm3,%xmm4
- psllq $1,%xmm3
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm3
- movdqa %xmm1,%xmm9
- psrlq $1,%xmm1
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm2,%xmm1
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm1
- pand %xmm7,%xmm15
- pxor %xmm1,%xmm2
- psllq $1,%xmm1
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm1
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm4,%xmm9
- psrlq $2,%xmm4
- movdqa %xmm3,%xmm10
- psrlq $2,%xmm3
- pxor %xmm6,%xmm4
- pxor %xmm5,%xmm3
- pand %xmm8,%xmm4
- pand %xmm8,%xmm3
- pxor %xmm4,%xmm6
- psllq $2,%xmm4
- pxor %xmm3,%xmm5
- psllq $2,%xmm3
- pxor %xmm9,%xmm4
- pxor %xmm10,%xmm3
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm2,%xmm0
- pxor %xmm1,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm2
- psllq $2,%xmm0
- pxor %xmm15,%xmm1
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm2,%xmm9
- psrlq $4,%xmm2
- movdqa %xmm1,%xmm10
- psrlq $4,%xmm1
- pxor %xmm6,%xmm2
- pxor %xmm5,%xmm1
- pand %xmm7,%xmm2
- pand %xmm7,%xmm1
- pxor %xmm2,%xmm6
- psllq $4,%xmm2
- pxor %xmm1,%xmm5
- psllq $4,%xmm1
- pxor %xmm9,%xmm2
- pxor %xmm10,%xmm1
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm4
- psllq $4,%xmm0
- pxor %xmm15,%xmm3
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- decl %r10d
- jmp .Ldec_sbox
-.align 16
-.Ldec_loop:
- pxor 0(%rax),%xmm15
- pxor 16(%rax),%xmm0
- pxor 32(%rax),%xmm1
- pxor 48(%rax),%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor 64(%rax),%xmm3
- pxor 80(%rax),%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor 96(%rax),%xmm5
- pxor 112(%rax),%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- leaq 128(%rax),%rax
-.Ldec_sbox:
- pxor %xmm3,%xmm2
-
- pxor %xmm6,%xmm3
- pxor %xmm6,%xmm1
- pxor %xmm3,%xmm5
- pxor %xmm5,%xmm6
- pxor %xmm6,%xmm0
-
- pxor %xmm0,%xmm15
- pxor %xmm4,%xmm1
- pxor %xmm15,%xmm2
- pxor %xmm15,%xmm4
- pxor %xmm2,%xmm0
- movdqa %xmm2,%xmm10
- movdqa %xmm6,%xmm9
- movdqa %xmm0,%xmm8
- movdqa %xmm3,%xmm12
- movdqa %xmm4,%xmm11
-
- pxor %xmm15,%xmm10
- pxor %xmm3,%xmm9
- pxor %xmm5,%xmm8
- movdqa %xmm10,%xmm13
- pxor %xmm15,%xmm12
- movdqa %xmm9,%xmm7
- pxor %xmm1,%xmm11
- movdqa %xmm10,%xmm14
-
- por %xmm8,%xmm9
- por %xmm11,%xmm10
- pxor %xmm7,%xmm14
- pand %xmm11,%xmm13
- pxor %xmm8,%xmm11
- pand %xmm8,%xmm7
- pand %xmm11,%xmm14
- movdqa %xmm5,%xmm11
- pxor %xmm1,%xmm11
- pand %xmm11,%xmm12
- pxor %xmm12,%xmm10
- pxor %xmm12,%xmm9
- movdqa %xmm2,%xmm12
- movdqa %xmm0,%xmm11
- pxor %xmm6,%xmm12
- pxor %xmm4,%xmm11
- movdqa %xmm12,%xmm8
- pand %xmm11,%xmm12
- por %xmm11,%xmm8
- pxor %xmm12,%xmm7
- pxor %xmm14,%xmm10
- pxor %xmm13,%xmm9
- pxor %xmm14,%xmm8
- movdqa %xmm3,%xmm11
- pxor %xmm13,%xmm7
- movdqa %xmm15,%xmm12
- pxor %xmm13,%xmm8
- movdqa %xmm6,%xmm13
- pand %xmm5,%xmm11
- movdqa %xmm2,%xmm14
- pand %xmm1,%xmm12
- pand %xmm0,%xmm13
- por %xmm4,%xmm14
- pxor %xmm11,%xmm10
- pxor %xmm12,%xmm9
- pxor %xmm13,%xmm8
- pxor %xmm14,%xmm7
-
-
-
-
-
- movdqa %xmm10,%xmm11
- pand %xmm8,%xmm10
- pxor %xmm9,%xmm11
-
- movdqa %xmm7,%xmm13
- movdqa %xmm11,%xmm14
- pxor %xmm10,%xmm13
- pand %xmm13,%xmm14
-
- movdqa %xmm8,%xmm12
- pxor %xmm9,%xmm14
- pxor %xmm7,%xmm12
-
- pxor %xmm9,%xmm10
-
- pand %xmm10,%xmm12
-
- movdqa %xmm13,%xmm9
- pxor %xmm7,%xmm12
-
- pxor %xmm12,%xmm9
- pxor %xmm12,%xmm8
-
- pand %xmm7,%xmm9
-
- pxor %xmm9,%xmm13
- pxor %xmm9,%xmm8
-
- pand %xmm14,%xmm13
-
- pxor %xmm11,%xmm13
- movdqa %xmm4,%xmm11
- movdqa %xmm0,%xmm7
- movdqa %xmm14,%xmm9
- pxor %xmm13,%xmm9
- pand %xmm4,%xmm9
- pxor %xmm0,%xmm4
- pand %xmm14,%xmm0
- pand %xmm13,%xmm4
- pxor %xmm0,%xmm4
- pxor %xmm9,%xmm0
- pxor %xmm1,%xmm11
- pxor %xmm5,%xmm7
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm1,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm5,%xmm1
- pand %xmm14,%xmm7
- pand %xmm12,%xmm5
- pand %xmm13,%xmm11
- pand %xmm8,%xmm1
- pxor %xmm11,%xmm7
- pxor %xmm5,%xmm1
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm5
- pxor %xmm11,%xmm4
- pxor %xmm11,%xmm1
- pxor %xmm7,%xmm0
- pxor %xmm7,%xmm5
-
- movdqa %xmm2,%xmm11
- movdqa %xmm6,%xmm7
- pxor %xmm15,%xmm11
- pxor %xmm3,%xmm7
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm15,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm3,%xmm15
- pand %xmm14,%xmm7
- pand %xmm12,%xmm3
- pand %xmm13,%xmm11
- pand %xmm8,%xmm15
- pxor %xmm11,%xmm7
- pxor %xmm3,%xmm15
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm3
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- pxor %xmm13,%xmm10
- pand %xmm2,%xmm10
- pxor %xmm6,%xmm2
- pand %xmm14,%xmm6
- pand %xmm13,%xmm2
- pxor %xmm6,%xmm2
- pxor %xmm10,%xmm6
- pxor %xmm11,%xmm2
- pxor %xmm11,%xmm15
- pxor %xmm7,%xmm6
- pxor %xmm7,%xmm3
- pxor %xmm6,%xmm0
- pxor %xmm4,%xmm5
-
- pxor %xmm0,%xmm3
- pxor %xmm6,%xmm1
- pxor %xmm6,%xmm4
- pxor %xmm1,%xmm3
- pxor %xmm15,%xmm6
- pxor %xmm4,%xmm3
- pxor %xmm5,%xmm2
- pxor %xmm0,%xmm5
- pxor %xmm3,%xmm2
-
- pxor %xmm15,%xmm3
- pxor %xmm2,%xmm6
- decl %r10d
- jl .Ldec_done
-
- pshufd $0x4E,%xmm15,%xmm7
- pshufd $0x4E,%xmm2,%xmm13
- pxor %xmm15,%xmm7
- pshufd $0x4E,%xmm4,%xmm14
- pxor %xmm2,%xmm13
- pshufd $0x4E,%xmm0,%xmm8
- pxor %xmm4,%xmm14
- pshufd $0x4E,%xmm5,%xmm9
- pxor %xmm0,%xmm8
- pshufd $0x4E,%xmm3,%xmm10
- pxor %xmm5,%xmm9
- pxor %xmm13,%xmm15
- pxor %xmm13,%xmm0
- pshufd $0x4E,%xmm1,%xmm11
- pxor %xmm3,%xmm10
- pxor %xmm7,%xmm5
- pxor %xmm8,%xmm3
- pshufd $0x4E,%xmm6,%xmm12
- pxor %xmm1,%xmm11
- pxor %xmm14,%xmm0
- pxor %xmm9,%xmm1
- pxor %xmm6,%xmm12
-
- pxor %xmm14,%xmm5
- pxor %xmm13,%xmm3
- pxor %xmm13,%xmm1
- pxor %xmm10,%xmm6
- pxor %xmm11,%xmm2
- pxor %xmm14,%xmm1
- pxor %xmm14,%xmm6
- pxor %xmm12,%xmm4
- pshufd $0x93,%xmm15,%xmm7
- pshufd $0x93,%xmm0,%xmm8
- pxor %xmm7,%xmm15
- pshufd $0x93,%xmm5,%xmm9
- pxor %xmm8,%xmm0
- pshufd $0x93,%xmm3,%xmm10
- pxor %xmm9,%xmm5
- pshufd $0x93,%xmm1,%xmm11
- pxor %xmm10,%xmm3
- pshufd $0x93,%xmm6,%xmm12
- pxor %xmm11,%xmm1
- pshufd $0x93,%xmm2,%xmm13
- pxor %xmm12,%xmm6
- pshufd $0x93,%xmm4,%xmm14
- pxor %xmm13,%xmm2
- pxor %xmm14,%xmm4
-
- pxor %xmm15,%xmm8
- pxor %xmm4,%xmm7
- pxor %xmm4,%xmm8
- pshufd $0x4E,%xmm15,%xmm15
- pxor %xmm0,%xmm9
- pshufd $0x4E,%xmm0,%xmm0
- pxor %xmm1,%xmm12
- pxor %xmm7,%xmm15
- pxor %xmm6,%xmm13
- pxor %xmm8,%xmm0
- pxor %xmm3,%xmm11
- pshufd $0x4E,%xmm1,%xmm7
- pxor %xmm2,%xmm14
- pshufd $0x4E,%xmm6,%xmm8
- pxor %xmm5,%xmm10
- pshufd $0x4E,%xmm3,%xmm1
- pxor %xmm4,%xmm10
- pshufd $0x4E,%xmm4,%xmm6
- pxor %xmm4,%xmm11
- pshufd $0x4E,%xmm2,%xmm3
- pxor %xmm11,%xmm7
- pshufd $0x4E,%xmm5,%xmm2
- pxor %xmm12,%xmm8
- pxor %xmm1,%xmm10
- pxor %xmm14,%xmm6
- pxor %xmm3,%xmm13
- movdqa %xmm7,%xmm3
- pxor %xmm9,%xmm2
- movdqa %xmm13,%xmm5
- movdqa %xmm8,%xmm4
- movdqa %xmm2,%xmm1
- movdqa %xmm10,%xmm2
- movdqa -16(%r11),%xmm7
- jnz .Ldec_loop
- movdqa -32(%r11),%xmm7
- jmp .Ldec_loop
-.align 16
-.Ldec_done:
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm2,%xmm9
- psrlq $1,%xmm2
- movdqa %xmm1,%xmm10
- psrlq $1,%xmm1
- pxor %xmm4,%xmm2
- pxor %xmm6,%xmm1
- pand %xmm7,%xmm2
- pand %xmm7,%xmm1
- pxor %xmm2,%xmm4
- psllq $1,%xmm2
- pxor %xmm1,%xmm6
- psllq $1,%xmm1
- pxor %xmm9,%xmm2
- pxor %xmm10,%xmm1
- movdqa %xmm5,%xmm9
- psrlq $1,%xmm5
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm3,%xmm5
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm5
- pand %xmm7,%xmm15
- pxor %xmm5,%xmm3
- psllq $1,%xmm5
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm6,%xmm9
- psrlq $2,%xmm6
- movdqa %xmm1,%xmm10
- psrlq $2,%xmm1
- pxor %xmm4,%xmm6
- pxor %xmm2,%xmm1
- pand %xmm8,%xmm6
- pand %xmm8,%xmm1
- pxor %xmm6,%xmm4
- psllq $2,%xmm6
- pxor %xmm1,%xmm2
- psllq $2,%xmm1
- pxor %xmm9,%xmm6
- pxor %xmm10,%xmm1
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm3,%xmm0
- pxor %xmm5,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm3
- psllq $2,%xmm0
- pxor %xmm15,%xmm5
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm3,%xmm9
- psrlq $4,%xmm3
- movdqa %xmm5,%xmm10
- psrlq $4,%xmm5
- pxor %xmm4,%xmm3
- pxor %xmm2,%xmm5
- pand %xmm7,%xmm3
- pand %xmm7,%xmm5
- pxor %xmm3,%xmm4
- psllq $4,%xmm3
- pxor %xmm5,%xmm2
- psllq $4,%xmm5
- pxor %xmm9,%xmm3
- pxor %xmm10,%xmm5
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm6,%xmm0
- pxor %xmm1,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm6
- psllq $4,%xmm0
- pxor %xmm15,%xmm1
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa (%rax),%xmm7
- pxor %xmm7,%xmm5
- pxor %xmm7,%xmm3
- pxor %xmm7,%xmm1
- pxor %xmm7,%xmm6
- pxor %xmm7,%xmm2
- pxor %xmm7,%xmm4
- pxor %xmm7,%xmm15
- pxor %xmm7,%xmm0
- .byte 0xf3,0xc3
-.cfi_endproc
-.size _bsaes_decrypt8,.-_bsaes_decrypt8
-.type _bsaes_key_convert,@function
-.align 16
-_bsaes_key_convert:
-.cfi_startproc
- leaq .Lmasks(%rip),%r11
- movdqu (%rcx),%xmm7
- leaq 16(%rcx),%rcx
- movdqa 0(%r11),%xmm0
- movdqa 16(%r11),%xmm1
- movdqa 32(%r11),%xmm2
- movdqa 48(%r11),%xmm3
- movdqa 64(%r11),%xmm4
- pcmpeqd %xmm5,%xmm5
-
- movdqu (%rcx),%xmm6
- movdqa %xmm7,(%rax)
- leaq 16(%rax),%rax
- decl %r10d
- jmp .Lkey_loop
-.align 16
-.Lkey_loop:
-.byte 102,15,56,0,244
-
- movdqa %xmm0,%xmm8
- movdqa %xmm1,%xmm9
-
- pand %xmm6,%xmm8
- pand %xmm6,%xmm9
- movdqa %xmm2,%xmm10
- pcmpeqb %xmm0,%xmm8
- psllq $4,%xmm0
- movdqa %xmm3,%xmm11
- pcmpeqb %xmm1,%xmm9
- psllq $4,%xmm1
-
- pand %xmm6,%xmm10
- pand %xmm6,%xmm11
- movdqa %xmm0,%xmm12
- pcmpeqb %xmm2,%xmm10
- psllq $4,%xmm2
- movdqa %xmm1,%xmm13
- pcmpeqb %xmm3,%xmm11
- psllq $4,%xmm3
-
- movdqa %xmm2,%xmm14
- movdqa %xmm3,%xmm15
- pxor %xmm5,%xmm8
- pxor %xmm5,%xmm9
-
- pand %xmm6,%xmm12
- pand %xmm6,%xmm13
- movdqa %xmm8,0(%rax)
- pcmpeqb %xmm0,%xmm12
- psrlq $4,%xmm0
- movdqa %xmm9,16(%rax)
- pcmpeqb %xmm1,%xmm13
- psrlq $4,%xmm1
- leaq 16(%rcx),%rcx
-
- pand %xmm6,%xmm14
- pand %xmm6,%xmm15
- movdqa %xmm10,32(%rax)
- pcmpeqb %xmm2,%xmm14
- psrlq $4,%xmm2
- movdqa %xmm11,48(%rax)
- pcmpeqb %xmm3,%xmm15
- psrlq $4,%xmm3
- movdqu (%rcx),%xmm6
-
- pxor %xmm5,%xmm13
- pxor %xmm5,%xmm14
- movdqa %xmm12,64(%rax)
- movdqa %xmm13,80(%rax)
- movdqa %xmm14,96(%rax)
- movdqa %xmm15,112(%rax)
- leaq 128(%rax),%rax
- decl %r10d
- jnz .Lkey_loop
-
- movdqa 80(%r11),%xmm7
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size _bsaes_key_convert,.-_bsaes_key_convert
-.globl bsaes_cbc_encrypt
-.hidden bsaes_cbc_encrypt
-.type bsaes_cbc_encrypt,@function
-.align 16
-bsaes_cbc_encrypt:
-.cfi_startproc
-
-
-
- movq %rsp,%rax
-.Lcbc_dec_prologue:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
- leaq -72(%rsp),%rsp
-.cfi_adjust_cfa_offset 0x48
- movq %rsp,%rbp
-.cfi_def_cfa_register %rbp
- movl 240(%rcx),%eax
- movq %rdi,%r12
- movq %rsi,%r13
- movq %rdx,%r14
- movq %rcx,%r15
- movq %r8,%rbx
- shrq $4,%r14
-
- movl %eax,%edx
- shlq $7,%rax
- subq $96,%rax
- subq %rax,%rsp
-
- movq %rsp,%rax
- movq %r15,%rcx
- movl %edx,%r10d
- call _bsaes_key_convert
- pxor (%rsp),%xmm7
- movdqa %xmm6,(%rax)
- movdqa %xmm7,(%rsp)
-
- movdqu (%rbx),%xmm14
- subq $8,%r14
- jc .Lcbc_dec_loop_done
-
-.Lcbc_dec_loop:
- movdqu 0(%r12),%xmm15
- movdqu 16(%r12),%xmm0
- movdqu 32(%r12),%xmm1
- movdqu 48(%r12),%xmm2
- movdqu 64(%r12),%xmm3
- movdqu 80(%r12),%xmm4
- movq %rsp,%rax
- movdqu 96(%r12),%xmm5
- movl %edx,%r10d
- movdqu 112(%r12),%xmm6
- movdqa %xmm14,32(%rbp)
-
- call _bsaes_decrypt8
-
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm11
- pxor %xmm10,%xmm1
- movdqu 80(%r12),%xmm12
- pxor %xmm11,%xmm6
- movdqu 96(%r12),%xmm13
- pxor %xmm12,%xmm2
- movdqu 112(%r12),%xmm14
- pxor %xmm13,%xmm4
- movdqu %xmm15,0(%r13)
- leaq 128(%r12),%r12
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- movdqu %xmm6,80(%r13)
- movdqu %xmm2,96(%r13)
- movdqu %xmm4,112(%r13)
- leaq 128(%r13),%r13
- subq $8,%r14
- jnc .Lcbc_dec_loop
-
-.Lcbc_dec_loop_done:
- addq $8,%r14
- jz .Lcbc_dec_done
-
- movdqu 0(%r12),%xmm15
- movq %rsp,%rax
- movl %edx,%r10d
- cmpq $2,%r14
- jb .Lcbc_dec_one
- movdqu 16(%r12),%xmm0
- je .Lcbc_dec_two
- movdqu 32(%r12),%xmm1
- cmpq $4,%r14
- jb .Lcbc_dec_three
- movdqu 48(%r12),%xmm2
- je .Lcbc_dec_four
- movdqu 64(%r12),%xmm3
- cmpq $6,%r14
- jb .Lcbc_dec_five
- movdqu 80(%r12),%xmm4
- je .Lcbc_dec_six
- movdqu 96(%r12),%xmm5
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm11
- pxor %xmm10,%xmm1
- movdqu 80(%r12),%xmm12
- pxor %xmm11,%xmm6
- movdqu 96(%r12),%xmm14
- pxor %xmm12,%xmm2
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- movdqu %xmm6,80(%r13)
- movdqu %xmm2,96(%r13)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_six:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm11
- pxor %xmm10,%xmm1
- movdqu 80(%r12),%xmm14
- pxor %xmm11,%xmm6
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- movdqu %xmm6,80(%r13)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_five:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm14
- pxor %xmm10,%xmm1
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_four:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm14
- pxor %xmm9,%xmm3
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_three:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm14
- pxor %xmm8,%xmm5
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_two:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm14
- pxor %xmm7,%xmm0
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_one:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm14
- movdqu %xmm15,0(%r13)
- jmp .Lcbc_dec_done
-
-.Lcbc_dec_done:
- movdqu %xmm14,(%rbx)
- leaq (%rsp),%rax
- pxor %xmm0,%xmm0
-.Lcbc_dec_bzero:
- movdqa %xmm0,0(%rax)
- movdqa %xmm0,16(%rax)
- leaq 32(%rax),%rax
- cmpq %rax,%rbp
- ja .Lcbc_dec_bzero
-
- leaq 120(%rbp),%rax
-.cfi_def_cfa %rax,8
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbx
-.cfi_restore %rbx
- movq -8(%rax),%rbp
-.cfi_restore %rbp
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lcbc_dec_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
-
-.globl bsaes_ctr32_encrypt_blocks
-.hidden bsaes_ctr32_encrypt_blocks
-.type bsaes_ctr32_encrypt_blocks,@function
-.align 16
-bsaes_ctr32_encrypt_blocks:
-.cfi_startproc
-#ifndef NDEBUG
-#ifndef BORINGSSL_FIPS
-.extern BORINGSSL_function_hit
-.hidden BORINGSSL_function_hit
- movb $1,BORINGSSL_function_hit+6(%rip)
-#endif
-#endif
- movq %rsp,%rax
-.Lctr_enc_prologue:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
- leaq -72(%rsp),%rsp
-.cfi_adjust_cfa_offset 0x48
- movq %rsp,%rbp
-.cfi_def_cfa_register %rbp
- movdqu (%r8),%xmm0
- movl 240(%rcx),%eax
- movq %rdi,%r12
- movq %rsi,%r13
- movq %rdx,%r14
- movq %rcx,%r15
- movdqa %xmm0,32(%rbp)
-
-
-
- movl %eax,%ebx
- shlq $7,%rax
- subq $96,%rax
- subq %rax,%rsp
-
- movq %rsp,%rax
- movq %r15,%rcx
- movl %ebx,%r10d
- call _bsaes_key_convert
- pxor %xmm6,%xmm7
- movdqa %xmm7,(%rax)
-
- movdqa (%rsp),%xmm8
- leaq .LADD1(%rip),%r11
- movdqa 32(%rbp),%xmm15
- movdqa -32(%r11),%xmm7
-.byte 102,68,15,56,0,199
-.byte 102,68,15,56,0,255
- movdqa %xmm8,(%rsp)
- jmp .Lctr_enc_loop
-.align 16
-.Lctr_enc_loop:
- movdqa %xmm15,32(%rbp)
- movdqa %xmm15,%xmm0
- movdqa %xmm15,%xmm1
- paddd 0(%r11),%xmm0
- movdqa %xmm15,%xmm2
- paddd 16(%r11),%xmm1
- movdqa %xmm15,%xmm3
- paddd 32(%r11),%xmm2
- movdqa %xmm15,%xmm4
- paddd 48(%r11),%xmm3
- movdqa %xmm15,%xmm5
- paddd 64(%r11),%xmm4
- movdqa %xmm15,%xmm6
- paddd 80(%r11),%xmm5
- paddd 96(%r11),%xmm6
-
-
-
- movdqa (%rsp),%xmm8
- leaq 16(%rsp),%rax
- movdqa -16(%r11),%xmm7
- pxor %xmm8,%xmm15
- pxor %xmm8,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm8,%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor %xmm8,%xmm3
- pxor %xmm8,%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor %xmm8,%xmm5
- pxor %xmm8,%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- leaq .LBS0(%rip),%r11
- movl %ebx,%r10d
-
- call _bsaes_encrypt8_bitslice
-
- subq $8,%r14
- jc .Lctr_enc_loop_done
-
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- movdqu 32(%r12),%xmm9
- movdqu 48(%r12),%xmm10
- movdqu 64(%r12),%xmm11
- movdqu 80(%r12),%xmm12
- movdqu 96(%r12),%xmm13
- movdqu 112(%r12),%xmm14
- leaq 128(%r12),%r12
- pxor %xmm15,%xmm7
- movdqa 32(%rbp),%xmm15
- pxor %xmm8,%xmm0
- movdqu %xmm7,0(%r13)
- pxor %xmm9,%xmm3
- movdqu %xmm0,16(%r13)
- pxor %xmm10,%xmm5
- movdqu %xmm3,32(%r13)
- pxor %xmm11,%xmm2
- movdqu %xmm5,48(%r13)
- pxor %xmm12,%xmm6
- movdqu %xmm2,64(%r13)
- pxor %xmm13,%xmm1
- movdqu %xmm6,80(%r13)
- pxor %xmm14,%xmm4
- movdqu %xmm1,96(%r13)
- leaq .LADD1(%rip),%r11
- movdqu %xmm4,112(%r13)
- leaq 128(%r13),%r13
- paddd 112(%r11),%xmm15
- jnz .Lctr_enc_loop
-
- jmp .Lctr_enc_done
-.align 16
-.Lctr_enc_loop_done:
- addq $8,%r14
- movdqu 0(%r12),%xmm7
- pxor %xmm7,%xmm15
- movdqu %xmm15,0(%r13)
- cmpq $2,%r14
- jb .Lctr_enc_done
- movdqu 16(%r12),%xmm8
- pxor %xmm8,%xmm0
- movdqu %xmm0,16(%r13)
- je .Lctr_enc_done
- movdqu 32(%r12),%xmm9
- pxor %xmm9,%xmm3
- movdqu %xmm3,32(%r13)
- cmpq $4,%r14
- jb .Lctr_enc_done
- movdqu 48(%r12),%xmm10
- pxor %xmm10,%xmm5
- movdqu %xmm5,48(%r13)
- je .Lctr_enc_done
- movdqu 64(%r12),%xmm11
- pxor %xmm11,%xmm2
- movdqu %xmm2,64(%r13)
- cmpq $6,%r14
- jb .Lctr_enc_done
- movdqu 80(%r12),%xmm12
- pxor %xmm12,%xmm6
- movdqu %xmm6,80(%r13)
- je .Lctr_enc_done
- movdqu 96(%r12),%xmm13
- pxor %xmm13,%xmm1
- movdqu %xmm1,96(%r13)
-
-
-
-.Lctr_enc_done:
- leaq (%rsp),%rax
- pxor %xmm0,%xmm0
-.Lctr_enc_bzero:
- movdqa %xmm0,0(%rax)
- movdqa %xmm0,16(%rax)
- leaq 32(%rax),%rax
- cmpq %rax,%rbp
- ja .Lctr_enc_bzero
-
- leaq 120(%rbp),%rax
-.cfi_def_cfa %rax,8
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbx
-.cfi_restore %rbx
- movq -8(%rax),%rbp
-.cfi_restore %rbp
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lctr_enc_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
-.type _bsaes_const,@object
-.align 64
-_bsaes_const:
-.LM0ISR:
-.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
-.LISRM0:
-.quad 0x01040b0e0205080f, 0x0306090c00070a0d
-.LISR:
-.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
-.LBS0:
-.quad 0x5555555555555555, 0x5555555555555555
-.LBS1:
-.quad 0x3333333333333333, 0x3333333333333333
-.LBS2:
-.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
-.LSR:
-.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
-.LSRM0:
-.quad 0x0304090e00050a0f, 0x01060b0c0207080d
-.LM0SR:
-.quad 0x0a0e02060f03070b, 0x0004080c05090d01
-.LSWPUP:
-.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
-.LSWPUPM0SR:
-.quad 0x0a0d02060c03070b, 0x0004080f05090e01
-.LADD1:
-.quad 0x0000000000000000, 0x0000000100000000
-.LADD2:
-.quad 0x0000000000000000, 0x0000000200000000
-.LADD3:
-.quad 0x0000000000000000, 0x0000000300000000
-.LADD4:
-.quad 0x0000000000000000, 0x0000000400000000
-.LADD5:
-.quad 0x0000000000000000, 0x0000000500000000
-.LADD6:
-.quad 0x0000000000000000, 0x0000000600000000
-.LADD7:
-.quad 0x0000000000000000, 0x0000000700000000
-.LADD8:
-.quad 0x0000000000000000, 0x0000000800000000
-.Lxts_magic:
-.long 0x87,0,1,0
-.Lmasks:
-.quad 0x0101010101010101, 0x0101010101010101
-.quad 0x0202020202020202, 0x0202020202020202
-.quad 0x0404040404040404, 0x0404040404040404
-.quad 0x0808080808080808, 0x0808080808080808
-.LM0:
-.quad 0x02060a0e03070b0f, 0x0004080c0105090d
-.L63:
-.quad 0x6363636363636363, 0x6363636363636363
-.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
-.align 64
-.size _bsaes_const,.-_bsaes_const
-#endif
diff --git a/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
index 4355438e..0fc93f9a 100644
--- a/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
+++ b/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
@@ -120,6 +120,181 @@ _vpaes_encrypt_core:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_encrypt_core_2x,@function
+.align 16
+_vpaes_encrypt_core_2x:
+.cfi_startproc
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa %xmm9,%xmm7
+ movdqa .Lk_ipt(%rip),%xmm2
+ movdqa %xmm2,%xmm8
+ pandn %xmm0,%xmm1
+ pandn %xmm6,%xmm7
+ movdqu (%r9),%xmm5
+
+ psrld $4,%xmm1
+ psrld $4,%xmm7
+ pand %xmm9,%xmm0
+ pand %xmm9,%xmm6
+.byte 102,15,56,0,208
+.byte 102,68,15,56,0,198
+ movdqa .Lk_ipt+16(%rip),%xmm0
+ movdqa %xmm0,%xmm6
+.byte 102,15,56,0,193
+.byte 102,15,56,0,247
+ pxor %xmm5,%xmm2
+ pxor %xmm5,%xmm8
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+ pxor %xmm8,%xmm6
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc2x_entry
+
+.align 16
+.Lenc2x_loop:
+
+ movdqa .Lk_sb1(%rip),%xmm4
+ movdqa .Lk_sb1+16(%rip),%xmm0
+ movdqa %xmm4,%xmm12
+ movdqa %xmm0,%xmm6
+.byte 102,15,56,0,226
+.byte 102,69,15,56,0,224
+.byte 102,15,56,0,195
+.byte 102,65,15,56,0,243
+ pxor %xmm5,%xmm4
+ pxor %xmm5,%xmm12
+ movdqa .Lk_sb2(%rip),%xmm5
+ movdqa %xmm5,%xmm13
+ pxor %xmm4,%xmm0
+ pxor %xmm12,%xmm6
+ movdqa -64(%r11,%r10,1),%xmm1
+
+.byte 102,15,56,0,234
+.byte 102,69,15,56,0,232
+ movdqa (%r11,%r10,1),%xmm4
+
+ movdqa .Lk_sb2+16(%rip),%xmm2
+ movdqa %xmm2,%xmm8
+.byte 102,15,56,0,211
+.byte 102,69,15,56,0,195
+ movdqa %xmm0,%xmm3
+ movdqa %xmm6,%xmm11
+ pxor %xmm5,%xmm2
+ pxor %xmm13,%xmm8
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,220
+.byte 102,68,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm11
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ andq $0x30,%r11
+ subq $1,%rax
+ pxor %xmm3,%xmm0
+ pxor %xmm11,%xmm6
+
+.Lenc2x_entry:
+
+ movdqa %xmm9,%xmm1
+ movdqa %xmm9,%xmm7
+ movdqa .Lk_inv+16(%rip),%xmm5
+ movdqa %xmm5,%xmm13
+ pandn %xmm0,%xmm1
+ pandn %xmm6,%xmm7
+ psrld $4,%xmm1
+ psrld $4,%xmm7
+ pand %xmm9,%xmm0
+ pand %xmm9,%xmm6
+.byte 102,15,56,0,232
+.byte 102,68,15,56,0,238
+ movdqa %xmm10,%xmm3
+ movdqa %xmm10,%xmm11
+ pxor %xmm1,%xmm0
+ pxor %xmm7,%xmm6
+.byte 102,15,56,0,217
+.byte 102,68,15,56,0,223
+ movdqa %xmm10,%xmm4
+ movdqa %xmm10,%xmm12
+ pxor %xmm5,%xmm3
+ pxor %xmm13,%xmm11
+.byte 102,15,56,0,224
+.byte 102,68,15,56,0,230
+ movdqa %xmm10,%xmm2
+ movdqa %xmm10,%xmm8
+ pxor %xmm5,%xmm4
+ pxor %xmm13,%xmm12
+.byte 102,15,56,0,211
+.byte 102,69,15,56,0,195
+ movdqa %xmm10,%xmm3
+ movdqa %xmm10,%xmm11
+ pxor %xmm0,%xmm2
+ pxor %xmm6,%xmm8
+.byte 102,15,56,0,220
+.byte 102,69,15,56,0,220
+ movdqu (%r9),%xmm5
+
+ pxor %xmm1,%xmm3
+ pxor %xmm7,%xmm11
+ jnz .Lenc2x_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+ movdqa %xmm4,%xmm12
+ movdqa %xmm0,%xmm6
+.byte 102,15,56,0,226
+.byte 102,69,15,56,0,224
+ pxor %xmm5,%xmm4
+ pxor %xmm5,%xmm12
+.byte 102,15,56,0,195
+.byte 102,65,15,56,0,243
+ movdqa 64(%r11,%r10,1),%xmm1
+
+ pxor %xmm4,%xmm0
+ pxor %xmm12,%xmm6
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x
+
+
+
+
+
+
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
@@ -759,6 +934,69 @@ vpaes_cbc_encrypt:
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
+.globl vpaes_ctr32_encrypt_blocks
+.hidden vpaes_ctr32_encrypt_blocks
+.type vpaes_ctr32_encrypt_blocks,@function
+.align 16
+vpaes_ctr32_encrypt_blocks:
+.cfi_startproc
+
+ xchgq %rcx,%rdx
+ testq %rcx,%rcx
+ jz .Lctr32_abort
+ movdqu (%r8),%xmm0
+ movdqa .Lctr_add_one(%rip),%xmm8
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ movdqa %xmm0,%xmm6
+ pshufb .Lrev_ctr(%rip),%xmm6
+
+ testq $1,%rcx
+ jz .Lctr32_prep_loop
+
+
+
+ movdqu (%rdi),%xmm7
+ call _vpaes_encrypt_core
+ pxor %xmm7,%xmm0
+ paddd %xmm8,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ subq $1,%rcx
+ leaq 16(%rdi),%rdi
+ jz .Lctr32_done
+
+.Lctr32_prep_loop:
+
+
+ movdqa %xmm6,%xmm14
+ movdqa %xmm6,%xmm15
+ paddd %xmm8,%xmm15
+
+.Lctr32_loop:
+ movdqa .Lrev_ctr(%rip),%xmm1
+ movdqa %xmm14,%xmm0
+ movdqa %xmm15,%xmm6
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ call _vpaes_encrypt_core_2x
+ movdqu (%rdi),%xmm1
+ movdqu 16(%rdi),%xmm2
+ movdqa .Lctr_add_two(%rip),%xmm3
+ pxor %xmm1,%xmm0
+ pxor %xmm2,%xmm6
+ paddd %xmm3,%xmm14
+ paddd %xmm3,%xmm15
+ movdqu %xmm0,(%rsi,%rdi,1)
+ movdqu %xmm6,16(%rsi,%rdi,1)
+ subq $2,%rcx
+ leaq 32(%rdi),%rdi
+ jnz .Lctr32_loop
+
+.Lctr32_done:
+.Lctr32_abort:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
@@ -881,6 +1119,17 @@ _vpaes_consts:
.Lk_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+
+
+.Lrev_ctr:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+
+
+.Lctr_add_one:
+.quad 0x0000000000000000, 0x0000000100000000
+.Lctr_add_two:
+.quad 0x0000000000000000, 0x0000000200000000
+
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 64
.size _vpaes_consts,.-_vpaes_consts
diff --git a/mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S b/mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S
deleted file mode 100644
index c2807e38..00000000
--- a/mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S
+++ /dev/null
@@ -1,1609 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-#endif
-
-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-
-
-.p2align 6
-_bsaes_encrypt8:
-
- leaq L$BS0(%rip),%r11
-
- movdqa (%rax),%xmm8
- leaq 16(%rax),%rax
- movdqa 80(%r11),%xmm7
- pxor %xmm8,%xmm15
- pxor %xmm8,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm8,%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor %xmm8,%xmm3
- pxor %xmm8,%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor %xmm8,%xmm5
- pxor %xmm8,%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
-_bsaes_encrypt8_bitslice:
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm5,%xmm9
- psrlq $1,%xmm5
- movdqa %xmm3,%xmm10
- psrlq $1,%xmm3
- pxor %xmm6,%xmm5
- pxor %xmm4,%xmm3
- pand %xmm7,%xmm5
- pand %xmm7,%xmm3
- pxor %xmm5,%xmm6
- psllq $1,%xmm5
- pxor %xmm3,%xmm4
- psllq $1,%xmm3
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm3
- movdqa %xmm1,%xmm9
- psrlq $1,%xmm1
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm2,%xmm1
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm1
- pand %xmm7,%xmm15
- pxor %xmm1,%xmm2
- psllq $1,%xmm1
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm1
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm4,%xmm9
- psrlq $2,%xmm4
- movdqa %xmm3,%xmm10
- psrlq $2,%xmm3
- pxor %xmm6,%xmm4
- pxor %xmm5,%xmm3
- pand %xmm8,%xmm4
- pand %xmm8,%xmm3
- pxor %xmm4,%xmm6
- psllq $2,%xmm4
- pxor %xmm3,%xmm5
- psllq $2,%xmm3
- pxor %xmm9,%xmm4
- pxor %xmm10,%xmm3
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm2,%xmm0
- pxor %xmm1,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm2
- psllq $2,%xmm0
- pxor %xmm15,%xmm1
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm2,%xmm9
- psrlq $4,%xmm2
- movdqa %xmm1,%xmm10
- psrlq $4,%xmm1
- pxor %xmm6,%xmm2
- pxor %xmm5,%xmm1
- pand %xmm7,%xmm2
- pand %xmm7,%xmm1
- pxor %xmm2,%xmm6
- psllq $4,%xmm2
- pxor %xmm1,%xmm5
- psllq $4,%xmm1
- pxor %xmm9,%xmm2
- pxor %xmm10,%xmm1
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm4
- psllq $4,%xmm0
- pxor %xmm15,%xmm3
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- decl %r10d
- jmp L$enc_sbox
-.p2align 4
-L$enc_loop:
- pxor 0(%rax),%xmm15
- pxor 16(%rax),%xmm0
- pxor 32(%rax),%xmm1
- pxor 48(%rax),%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor 64(%rax),%xmm3
- pxor 80(%rax),%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor 96(%rax),%xmm5
- pxor 112(%rax),%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- leaq 128(%rax),%rax
-L$enc_sbox:
- pxor %xmm5,%xmm4
- pxor %xmm0,%xmm1
- pxor %xmm15,%xmm2
- pxor %xmm1,%xmm5
- pxor %xmm15,%xmm4
-
- pxor %xmm2,%xmm5
- pxor %xmm6,%xmm2
- pxor %xmm4,%xmm6
- pxor %xmm3,%xmm2
- pxor %xmm4,%xmm3
- pxor %xmm0,%xmm2
-
- pxor %xmm6,%xmm1
- pxor %xmm4,%xmm0
- movdqa %xmm6,%xmm10
- movdqa %xmm0,%xmm9
- movdqa %xmm4,%xmm8
- movdqa %xmm1,%xmm12
- movdqa %xmm5,%xmm11
-
- pxor %xmm3,%xmm10
- pxor %xmm1,%xmm9
- pxor %xmm2,%xmm8
- movdqa %xmm10,%xmm13
- pxor %xmm3,%xmm12
- movdqa %xmm9,%xmm7
- pxor %xmm15,%xmm11
- movdqa %xmm10,%xmm14
-
- por %xmm8,%xmm9
- por %xmm11,%xmm10
- pxor %xmm7,%xmm14
- pand %xmm11,%xmm13
- pxor %xmm8,%xmm11
- pand %xmm8,%xmm7
- pand %xmm11,%xmm14
- movdqa %xmm2,%xmm11
- pxor %xmm15,%xmm11
- pand %xmm11,%xmm12
- pxor %xmm12,%xmm10
- pxor %xmm12,%xmm9
- movdqa %xmm6,%xmm12
- movdqa %xmm4,%xmm11
- pxor %xmm0,%xmm12
- pxor %xmm5,%xmm11
- movdqa %xmm12,%xmm8
- pand %xmm11,%xmm12
- por %xmm11,%xmm8
- pxor %xmm12,%xmm7
- pxor %xmm14,%xmm10
- pxor %xmm13,%xmm9
- pxor %xmm14,%xmm8
- movdqa %xmm1,%xmm11
- pxor %xmm13,%xmm7
- movdqa %xmm3,%xmm12
- pxor %xmm13,%xmm8
- movdqa %xmm0,%xmm13
- pand %xmm2,%xmm11
- movdqa %xmm6,%xmm14
- pand %xmm15,%xmm12
- pand %xmm4,%xmm13
- por %xmm5,%xmm14
- pxor %xmm11,%xmm10
- pxor %xmm12,%xmm9
- pxor %xmm13,%xmm8
- pxor %xmm14,%xmm7
-
-
-
-
-
- movdqa %xmm10,%xmm11
- pand %xmm8,%xmm10
- pxor %xmm9,%xmm11
-
- movdqa %xmm7,%xmm13
- movdqa %xmm11,%xmm14
- pxor %xmm10,%xmm13
- pand %xmm13,%xmm14
-
- movdqa %xmm8,%xmm12
- pxor %xmm9,%xmm14
- pxor %xmm7,%xmm12
-
- pxor %xmm9,%xmm10
-
- pand %xmm10,%xmm12
-
- movdqa %xmm13,%xmm9
- pxor %xmm7,%xmm12
-
- pxor %xmm12,%xmm9
- pxor %xmm12,%xmm8
-
- pand %xmm7,%xmm9
-
- pxor %xmm9,%xmm13
- pxor %xmm9,%xmm8
-
- pand %xmm14,%xmm13
-
- pxor %xmm11,%xmm13
- movdqa %xmm5,%xmm11
- movdqa %xmm4,%xmm7
- movdqa %xmm14,%xmm9
- pxor %xmm13,%xmm9
- pand %xmm5,%xmm9
- pxor %xmm4,%xmm5
- pand %xmm14,%xmm4
- pand %xmm13,%xmm5
- pxor %xmm4,%xmm5
- pxor %xmm9,%xmm4
- pxor %xmm15,%xmm11
- pxor %xmm2,%xmm7
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm15,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm2,%xmm15
- pand %xmm14,%xmm7
- pand %xmm12,%xmm2
- pand %xmm13,%xmm11
- pand %xmm8,%xmm15
- pxor %xmm11,%xmm7
- pxor %xmm2,%xmm15
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm2
- pxor %xmm11,%xmm5
- pxor %xmm11,%xmm15
- pxor %xmm7,%xmm4
- pxor %xmm7,%xmm2
-
- movdqa %xmm6,%xmm11
- movdqa %xmm0,%xmm7
- pxor %xmm3,%xmm11
- pxor %xmm1,%xmm7
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm3,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm1,%xmm3
- pand %xmm14,%xmm7
- pand %xmm12,%xmm1
- pand %xmm13,%xmm11
- pand %xmm8,%xmm3
- pxor %xmm11,%xmm7
- pxor %xmm1,%xmm3
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm1
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- pxor %xmm13,%xmm10
- pand %xmm6,%xmm10
- pxor %xmm0,%xmm6
- pand %xmm14,%xmm0
- pand %xmm13,%xmm6
- pxor %xmm0,%xmm6
- pxor %xmm10,%xmm0
- pxor %xmm11,%xmm6
- pxor %xmm11,%xmm3
- pxor %xmm7,%xmm0
- pxor %xmm7,%xmm1
- pxor %xmm15,%xmm6
- pxor %xmm5,%xmm0
- pxor %xmm6,%xmm3
- pxor %xmm15,%xmm5
- pxor %xmm0,%xmm15
-
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- pxor %xmm2,%xmm1
- pxor %xmm4,%xmm2
- pxor %xmm4,%xmm3
-
- pxor %xmm2,%xmm5
- decl %r10d
- jl L$enc_done
- pshufd $0x93,%xmm15,%xmm7
- pshufd $0x93,%xmm0,%xmm8
- pxor %xmm7,%xmm15
- pshufd $0x93,%xmm3,%xmm9
- pxor %xmm8,%xmm0
- pshufd $0x93,%xmm5,%xmm10
- pxor %xmm9,%xmm3
- pshufd $0x93,%xmm2,%xmm11
- pxor %xmm10,%xmm5
- pshufd $0x93,%xmm6,%xmm12
- pxor %xmm11,%xmm2
- pshufd $0x93,%xmm1,%xmm13
- pxor %xmm12,%xmm6
- pshufd $0x93,%xmm4,%xmm14
- pxor %xmm13,%xmm1
- pxor %xmm14,%xmm4
-
- pxor %xmm15,%xmm8
- pxor %xmm4,%xmm7
- pxor %xmm4,%xmm8
- pshufd $0x4E,%xmm15,%xmm15
- pxor %xmm0,%xmm9
- pshufd $0x4E,%xmm0,%xmm0
- pxor %xmm2,%xmm12
- pxor %xmm7,%xmm15
- pxor %xmm6,%xmm13
- pxor %xmm8,%xmm0
- pxor %xmm5,%xmm11
- pshufd $0x4E,%xmm2,%xmm7
- pxor %xmm1,%xmm14
- pshufd $0x4E,%xmm6,%xmm8
- pxor %xmm3,%xmm10
- pshufd $0x4E,%xmm5,%xmm2
- pxor %xmm4,%xmm10
- pshufd $0x4E,%xmm4,%xmm6
- pxor %xmm4,%xmm11
- pshufd $0x4E,%xmm1,%xmm5
- pxor %xmm11,%xmm7
- pshufd $0x4E,%xmm3,%xmm1
- pxor %xmm12,%xmm8
- pxor %xmm10,%xmm2
- pxor %xmm14,%xmm6
- pxor %xmm13,%xmm5
- movdqa %xmm7,%xmm3
- pxor %xmm9,%xmm1
- movdqa %xmm8,%xmm4
- movdqa 48(%r11),%xmm7
- jnz L$enc_loop
- movdqa 64(%r11),%xmm7
- jmp L$enc_loop
-.p2align 4
-L$enc_done:
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm1,%xmm9
- psrlq $1,%xmm1
- movdqa %xmm2,%xmm10
- psrlq $1,%xmm2
- pxor %xmm4,%xmm1
- pxor %xmm6,%xmm2
- pand %xmm7,%xmm1
- pand %xmm7,%xmm2
- pxor %xmm1,%xmm4
- psllq $1,%xmm1
- pxor %xmm2,%xmm6
- psllq $1,%xmm2
- pxor %xmm9,%xmm1
- pxor %xmm10,%xmm2
- movdqa %xmm3,%xmm9
- psrlq $1,%xmm3
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm5,%xmm3
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm3
- pand %xmm7,%xmm15
- pxor %xmm3,%xmm5
- psllq $1,%xmm3
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm3
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm6,%xmm9
- psrlq $2,%xmm6
- movdqa %xmm2,%xmm10
- psrlq $2,%xmm2
- pxor %xmm4,%xmm6
- pxor %xmm1,%xmm2
- pand %xmm8,%xmm6
- pand %xmm8,%xmm2
- pxor %xmm6,%xmm4
- psllq $2,%xmm6
- pxor %xmm2,%xmm1
- psllq $2,%xmm2
- pxor %xmm9,%xmm6
- pxor %xmm10,%xmm2
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm5,%xmm0
- pxor %xmm3,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm5
- psllq $2,%xmm0
- pxor %xmm15,%xmm3
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm5,%xmm9
- psrlq $4,%xmm5
- movdqa %xmm3,%xmm10
- psrlq $4,%xmm3
- pxor %xmm4,%xmm5
- pxor %xmm1,%xmm3
- pand %xmm7,%xmm5
- pand %xmm7,%xmm3
- pxor %xmm5,%xmm4
- psllq $4,%xmm5
- pxor %xmm3,%xmm1
- psllq $4,%xmm3
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm3
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm6,%xmm0
- pxor %xmm2,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm6
- psllq $4,%xmm0
- pxor %xmm15,%xmm2
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa (%rax),%xmm7
- pxor %xmm7,%xmm3
- pxor %xmm7,%xmm5
- pxor %xmm7,%xmm2
- pxor %xmm7,%xmm6
- pxor %xmm7,%xmm1
- pxor %xmm7,%xmm4
- pxor %xmm7,%xmm15
- pxor %xmm7,%xmm0
- .byte 0xf3,0xc3
-
-
-
-
-.p2align 6
-_bsaes_decrypt8:
-
- leaq L$BS0(%rip),%r11
-
- movdqa (%rax),%xmm8
- leaq 16(%rax),%rax
- movdqa -48(%r11),%xmm7
- pxor %xmm8,%xmm15
- pxor %xmm8,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm8,%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor %xmm8,%xmm3
- pxor %xmm8,%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor %xmm8,%xmm5
- pxor %xmm8,%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm5,%xmm9
- psrlq $1,%xmm5
- movdqa %xmm3,%xmm10
- psrlq $1,%xmm3
- pxor %xmm6,%xmm5
- pxor %xmm4,%xmm3
- pand %xmm7,%xmm5
- pand %xmm7,%xmm3
- pxor %xmm5,%xmm6
- psllq $1,%xmm5
- pxor %xmm3,%xmm4
- psllq $1,%xmm3
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm3
- movdqa %xmm1,%xmm9
- psrlq $1,%xmm1
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm2,%xmm1
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm1
- pand %xmm7,%xmm15
- pxor %xmm1,%xmm2
- psllq $1,%xmm1
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm1
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm4,%xmm9
- psrlq $2,%xmm4
- movdqa %xmm3,%xmm10
- psrlq $2,%xmm3
- pxor %xmm6,%xmm4
- pxor %xmm5,%xmm3
- pand %xmm8,%xmm4
- pand %xmm8,%xmm3
- pxor %xmm4,%xmm6
- psllq $2,%xmm4
- pxor %xmm3,%xmm5
- psllq $2,%xmm3
- pxor %xmm9,%xmm4
- pxor %xmm10,%xmm3
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm2,%xmm0
- pxor %xmm1,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm2
- psllq $2,%xmm0
- pxor %xmm15,%xmm1
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm2,%xmm9
- psrlq $4,%xmm2
- movdqa %xmm1,%xmm10
- psrlq $4,%xmm1
- pxor %xmm6,%xmm2
- pxor %xmm5,%xmm1
- pand %xmm7,%xmm2
- pand %xmm7,%xmm1
- pxor %xmm2,%xmm6
- psllq $4,%xmm2
- pxor %xmm1,%xmm5
- psllq $4,%xmm1
- pxor %xmm9,%xmm2
- pxor %xmm10,%xmm1
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm4,%xmm0
- pxor %xmm3,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm4
- psllq $4,%xmm0
- pxor %xmm15,%xmm3
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- decl %r10d
- jmp L$dec_sbox
-.p2align 4
-L$dec_loop:
- pxor 0(%rax),%xmm15
- pxor 16(%rax),%xmm0
- pxor 32(%rax),%xmm1
- pxor 48(%rax),%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor 64(%rax),%xmm3
- pxor 80(%rax),%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor 96(%rax),%xmm5
- pxor 112(%rax),%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- leaq 128(%rax),%rax
-L$dec_sbox:
- pxor %xmm3,%xmm2
-
- pxor %xmm6,%xmm3
- pxor %xmm6,%xmm1
- pxor %xmm3,%xmm5
- pxor %xmm5,%xmm6
- pxor %xmm6,%xmm0
-
- pxor %xmm0,%xmm15
- pxor %xmm4,%xmm1
- pxor %xmm15,%xmm2
- pxor %xmm15,%xmm4
- pxor %xmm2,%xmm0
- movdqa %xmm2,%xmm10
- movdqa %xmm6,%xmm9
- movdqa %xmm0,%xmm8
- movdqa %xmm3,%xmm12
- movdqa %xmm4,%xmm11
-
- pxor %xmm15,%xmm10
- pxor %xmm3,%xmm9
- pxor %xmm5,%xmm8
- movdqa %xmm10,%xmm13
- pxor %xmm15,%xmm12
- movdqa %xmm9,%xmm7
- pxor %xmm1,%xmm11
- movdqa %xmm10,%xmm14
-
- por %xmm8,%xmm9
- por %xmm11,%xmm10
- pxor %xmm7,%xmm14
- pand %xmm11,%xmm13
- pxor %xmm8,%xmm11
- pand %xmm8,%xmm7
- pand %xmm11,%xmm14
- movdqa %xmm5,%xmm11
- pxor %xmm1,%xmm11
- pand %xmm11,%xmm12
- pxor %xmm12,%xmm10
- pxor %xmm12,%xmm9
- movdqa %xmm2,%xmm12
- movdqa %xmm0,%xmm11
- pxor %xmm6,%xmm12
- pxor %xmm4,%xmm11
- movdqa %xmm12,%xmm8
- pand %xmm11,%xmm12
- por %xmm11,%xmm8
- pxor %xmm12,%xmm7
- pxor %xmm14,%xmm10
- pxor %xmm13,%xmm9
- pxor %xmm14,%xmm8
- movdqa %xmm3,%xmm11
- pxor %xmm13,%xmm7
- movdqa %xmm15,%xmm12
- pxor %xmm13,%xmm8
- movdqa %xmm6,%xmm13
- pand %xmm5,%xmm11
- movdqa %xmm2,%xmm14
- pand %xmm1,%xmm12
- pand %xmm0,%xmm13
- por %xmm4,%xmm14
- pxor %xmm11,%xmm10
- pxor %xmm12,%xmm9
- pxor %xmm13,%xmm8
- pxor %xmm14,%xmm7
-
-
-
-
-
- movdqa %xmm10,%xmm11
- pand %xmm8,%xmm10
- pxor %xmm9,%xmm11
-
- movdqa %xmm7,%xmm13
- movdqa %xmm11,%xmm14
- pxor %xmm10,%xmm13
- pand %xmm13,%xmm14
-
- movdqa %xmm8,%xmm12
- pxor %xmm9,%xmm14
- pxor %xmm7,%xmm12
-
- pxor %xmm9,%xmm10
-
- pand %xmm10,%xmm12
-
- movdqa %xmm13,%xmm9
- pxor %xmm7,%xmm12
-
- pxor %xmm12,%xmm9
- pxor %xmm12,%xmm8
-
- pand %xmm7,%xmm9
-
- pxor %xmm9,%xmm13
- pxor %xmm9,%xmm8
-
- pand %xmm14,%xmm13
-
- pxor %xmm11,%xmm13
- movdqa %xmm4,%xmm11
- movdqa %xmm0,%xmm7
- movdqa %xmm14,%xmm9
- pxor %xmm13,%xmm9
- pand %xmm4,%xmm9
- pxor %xmm0,%xmm4
- pand %xmm14,%xmm0
- pand %xmm13,%xmm4
- pxor %xmm0,%xmm4
- pxor %xmm9,%xmm0
- pxor %xmm1,%xmm11
- pxor %xmm5,%xmm7
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm1,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm5,%xmm1
- pand %xmm14,%xmm7
- pand %xmm12,%xmm5
- pand %xmm13,%xmm11
- pand %xmm8,%xmm1
- pxor %xmm11,%xmm7
- pxor %xmm5,%xmm1
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm5
- pxor %xmm11,%xmm4
- pxor %xmm11,%xmm1
- pxor %xmm7,%xmm0
- pxor %xmm7,%xmm5
-
- movdqa %xmm2,%xmm11
- movdqa %xmm6,%xmm7
- pxor %xmm15,%xmm11
- pxor %xmm3,%xmm7
- movdqa %xmm14,%xmm10
- movdqa %xmm12,%xmm9
- pxor %xmm13,%xmm10
- pxor %xmm8,%xmm9
- pand %xmm11,%xmm10
- pand %xmm15,%xmm9
- pxor %xmm7,%xmm11
- pxor %xmm3,%xmm15
- pand %xmm14,%xmm7
- pand %xmm12,%xmm3
- pand %xmm13,%xmm11
- pand %xmm8,%xmm15
- pxor %xmm11,%xmm7
- pxor %xmm3,%xmm15
- pxor %xmm10,%xmm11
- pxor %xmm9,%xmm3
- pxor %xmm12,%xmm14
- pxor %xmm8,%xmm13
- movdqa %xmm14,%xmm10
- pxor %xmm13,%xmm10
- pand %xmm2,%xmm10
- pxor %xmm6,%xmm2
- pand %xmm14,%xmm6
- pand %xmm13,%xmm2
- pxor %xmm6,%xmm2
- pxor %xmm10,%xmm6
- pxor %xmm11,%xmm2
- pxor %xmm11,%xmm15
- pxor %xmm7,%xmm6
- pxor %xmm7,%xmm3
- pxor %xmm6,%xmm0
- pxor %xmm4,%xmm5
-
- pxor %xmm0,%xmm3
- pxor %xmm6,%xmm1
- pxor %xmm6,%xmm4
- pxor %xmm1,%xmm3
- pxor %xmm15,%xmm6
- pxor %xmm4,%xmm3
- pxor %xmm5,%xmm2
- pxor %xmm0,%xmm5
- pxor %xmm3,%xmm2
-
- pxor %xmm15,%xmm3
- pxor %xmm2,%xmm6
- decl %r10d
- jl L$dec_done
-
- pshufd $0x4E,%xmm15,%xmm7
- pshufd $0x4E,%xmm2,%xmm13
- pxor %xmm15,%xmm7
- pshufd $0x4E,%xmm4,%xmm14
- pxor %xmm2,%xmm13
- pshufd $0x4E,%xmm0,%xmm8
- pxor %xmm4,%xmm14
- pshufd $0x4E,%xmm5,%xmm9
- pxor %xmm0,%xmm8
- pshufd $0x4E,%xmm3,%xmm10
- pxor %xmm5,%xmm9
- pxor %xmm13,%xmm15
- pxor %xmm13,%xmm0
- pshufd $0x4E,%xmm1,%xmm11
- pxor %xmm3,%xmm10
- pxor %xmm7,%xmm5
- pxor %xmm8,%xmm3
- pshufd $0x4E,%xmm6,%xmm12
- pxor %xmm1,%xmm11
- pxor %xmm14,%xmm0
- pxor %xmm9,%xmm1
- pxor %xmm6,%xmm12
-
- pxor %xmm14,%xmm5
- pxor %xmm13,%xmm3
- pxor %xmm13,%xmm1
- pxor %xmm10,%xmm6
- pxor %xmm11,%xmm2
- pxor %xmm14,%xmm1
- pxor %xmm14,%xmm6
- pxor %xmm12,%xmm4
- pshufd $0x93,%xmm15,%xmm7
- pshufd $0x93,%xmm0,%xmm8
- pxor %xmm7,%xmm15
- pshufd $0x93,%xmm5,%xmm9
- pxor %xmm8,%xmm0
- pshufd $0x93,%xmm3,%xmm10
- pxor %xmm9,%xmm5
- pshufd $0x93,%xmm1,%xmm11
- pxor %xmm10,%xmm3
- pshufd $0x93,%xmm6,%xmm12
- pxor %xmm11,%xmm1
- pshufd $0x93,%xmm2,%xmm13
- pxor %xmm12,%xmm6
- pshufd $0x93,%xmm4,%xmm14
- pxor %xmm13,%xmm2
- pxor %xmm14,%xmm4
-
- pxor %xmm15,%xmm8
- pxor %xmm4,%xmm7
- pxor %xmm4,%xmm8
- pshufd $0x4E,%xmm15,%xmm15
- pxor %xmm0,%xmm9
- pshufd $0x4E,%xmm0,%xmm0
- pxor %xmm1,%xmm12
- pxor %xmm7,%xmm15
- pxor %xmm6,%xmm13
- pxor %xmm8,%xmm0
- pxor %xmm3,%xmm11
- pshufd $0x4E,%xmm1,%xmm7
- pxor %xmm2,%xmm14
- pshufd $0x4E,%xmm6,%xmm8
- pxor %xmm5,%xmm10
- pshufd $0x4E,%xmm3,%xmm1
- pxor %xmm4,%xmm10
- pshufd $0x4E,%xmm4,%xmm6
- pxor %xmm4,%xmm11
- pshufd $0x4E,%xmm2,%xmm3
- pxor %xmm11,%xmm7
- pshufd $0x4E,%xmm5,%xmm2
- pxor %xmm12,%xmm8
- pxor %xmm1,%xmm10
- pxor %xmm14,%xmm6
- pxor %xmm3,%xmm13
- movdqa %xmm7,%xmm3
- pxor %xmm9,%xmm2
- movdqa %xmm13,%xmm5
- movdqa %xmm8,%xmm4
- movdqa %xmm2,%xmm1
- movdqa %xmm10,%xmm2
- movdqa -16(%r11),%xmm7
- jnz L$dec_loop
- movdqa -32(%r11),%xmm7
- jmp L$dec_loop
-.p2align 4
-L$dec_done:
- movdqa 0(%r11),%xmm7
- movdqa 16(%r11),%xmm8
- movdqa %xmm2,%xmm9
- psrlq $1,%xmm2
- movdqa %xmm1,%xmm10
- psrlq $1,%xmm1
- pxor %xmm4,%xmm2
- pxor %xmm6,%xmm1
- pand %xmm7,%xmm2
- pand %xmm7,%xmm1
- pxor %xmm2,%xmm4
- psllq $1,%xmm2
- pxor %xmm1,%xmm6
- psllq $1,%xmm1
- pxor %xmm9,%xmm2
- pxor %xmm10,%xmm1
- movdqa %xmm5,%xmm9
- psrlq $1,%xmm5
- movdqa %xmm15,%xmm10
- psrlq $1,%xmm15
- pxor %xmm3,%xmm5
- pxor %xmm0,%xmm15
- pand %xmm7,%xmm5
- pand %xmm7,%xmm15
- pxor %xmm5,%xmm3
- psllq $1,%xmm5
- pxor %xmm15,%xmm0
- psllq $1,%xmm15
- pxor %xmm9,%xmm5
- pxor %xmm10,%xmm15
- movdqa 32(%r11),%xmm7
- movdqa %xmm6,%xmm9
- psrlq $2,%xmm6
- movdqa %xmm1,%xmm10
- psrlq $2,%xmm1
- pxor %xmm4,%xmm6
- pxor %xmm2,%xmm1
- pand %xmm8,%xmm6
- pand %xmm8,%xmm1
- pxor %xmm6,%xmm4
- psllq $2,%xmm6
- pxor %xmm1,%xmm2
- psllq $2,%xmm1
- pxor %xmm9,%xmm6
- pxor %xmm10,%xmm1
- movdqa %xmm0,%xmm9
- psrlq $2,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $2,%xmm15
- pxor %xmm3,%xmm0
- pxor %xmm5,%xmm15
- pand %xmm8,%xmm0
- pand %xmm8,%xmm15
- pxor %xmm0,%xmm3
- psllq $2,%xmm0
- pxor %xmm15,%xmm5
- psllq $2,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa %xmm3,%xmm9
- psrlq $4,%xmm3
- movdqa %xmm5,%xmm10
- psrlq $4,%xmm5
- pxor %xmm4,%xmm3
- pxor %xmm2,%xmm5
- pand %xmm7,%xmm3
- pand %xmm7,%xmm5
- pxor %xmm3,%xmm4
- psllq $4,%xmm3
- pxor %xmm5,%xmm2
- psllq $4,%xmm5
- pxor %xmm9,%xmm3
- pxor %xmm10,%xmm5
- movdqa %xmm0,%xmm9
- psrlq $4,%xmm0
- movdqa %xmm15,%xmm10
- psrlq $4,%xmm15
- pxor %xmm6,%xmm0
- pxor %xmm1,%xmm15
- pand %xmm7,%xmm0
- pand %xmm7,%xmm15
- pxor %xmm0,%xmm6
- psllq $4,%xmm0
- pxor %xmm15,%xmm1
- psllq $4,%xmm15
- pxor %xmm9,%xmm0
- pxor %xmm10,%xmm15
- movdqa (%rax),%xmm7
- pxor %xmm7,%xmm5
- pxor %xmm7,%xmm3
- pxor %xmm7,%xmm1
- pxor %xmm7,%xmm6
- pxor %xmm7,%xmm2
- pxor %xmm7,%xmm4
- pxor %xmm7,%xmm15
- pxor %xmm7,%xmm0
- .byte 0xf3,0xc3
-
-
-
-.p2align 4
-_bsaes_key_convert:
-
- leaq L$masks(%rip),%r11
- movdqu (%rcx),%xmm7
- leaq 16(%rcx),%rcx
- movdqa 0(%r11),%xmm0
- movdqa 16(%r11),%xmm1
- movdqa 32(%r11),%xmm2
- movdqa 48(%r11),%xmm3
- movdqa 64(%r11),%xmm4
- pcmpeqd %xmm5,%xmm5
-
- movdqu (%rcx),%xmm6
- movdqa %xmm7,(%rax)
- leaq 16(%rax),%rax
- decl %r10d
- jmp L$key_loop
-.p2align 4
-L$key_loop:
-.byte 102,15,56,0,244
-
- movdqa %xmm0,%xmm8
- movdqa %xmm1,%xmm9
-
- pand %xmm6,%xmm8
- pand %xmm6,%xmm9
- movdqa %xmm2,%xmm10
- pcmpeqb %xmm0,%xmm8
- psllq $4,%xmm0
- movdqa %xmm3,%xmm11
- pcmpeqb %xmm1,%xmm9
- psllq $4,%xmm1
-
- pand %xmm6,%xmm10
- pand %xmm6,%xmm11
- movdqa %xmm0,%xmm12
- pcmpeqb %xmm2,%xmm10
- psllq $4,%xmm2
- movdqa %xmm1,%xmm13
- pcmpeqb %xmm3,%xmm11
- psllq $4,%xmm3
-
- movdqa %xmm2,%xmm14
- movdqa %xmm3,%xmm15
- pxor %xmm5,%xmm8
- pxor %xmm5,%xmm9
-
- pand %xmm6,%xmm12
- pand %xmm6,%xmm13
- movdqa %xmm8,0(%rax)
- pcmpeqb %xmm0,%xmm12
- psrlq $4,%xmm0
- movdqa %xmm9,16(%rax)
- pcmpeqb %xmm1,%xmm13
- psrlq $4,%xmm1
- leaq 16(%rcx),%rcx
-
- pand %xmm6,%xmm14
- pand %xmm6,%xmm15
- movdqa %xmm10,32(%rax)
- pcmpeqb %xmm2,%xmm14
- psrlq $4,%xmm2
- movdqa %xmm11,48(%rax)
- pcmpeqb %xmm3,%xmm15
- psrlq $4,%xmm3
- movdqu (%rcx),%xmm6
-
- pxor %xmm5,%xmm13
- pxor %xmm5,%xmm14
- movdqa %xmm12,64(%rax)
- movdqa %xmm13,80(%rax)
- movdqa %xmm14,96(%rax)
- movdqa %xmm15,112(%rax)
- leaq 128(%rax),%rax
- decl %r10d
- jnz L$key_loop
-
- movdqa 80(%r11),%xmm7
-
- .byte 0xf3,0xc3
-
-
-.globl _bsaes_cbc_encrypt
-.private_extern _bsaes_cbc_encrypt
-
-.p2align 4
-_bsaes_cbc_encrypt:
-
-
-
-
- movq %rsp,%rax
-L$cbc_dec_prologue:
- pushq %rbp
-
- pushq %rbx
-
- pushq %r12
-
- pushq %r13
-
- pushq %r14
-
- pushq %r15
-
- leaq -72(%rsp),%rsp
-
- movq %rsp,%rbp
-
- movl 240(%rcx),%eax
- movq %rdi,%r12
- movq %rsi,%r13
- movq %rdx,%r14
- movq %rcx,%r15
- movq %r8,%rbx
- shrq $4,%r14
-
- movl %eax,%edx
- shlq $7,%rax
- subq $96,%rax
- subq %rax,%rsp
-
- movq %rsp,%rax
- movq %r15,%rcx
- movl %edx,%r10d
- call _bsaes_key_convert
- pxor (%rsp),%xmm7
- movdqa %xmm6,(%rax)
- movdqa %xmm7,(%rsp)
-
- movdqu (%rbx),%xmm14
- subq $8,%r14
- jc L$cbc_dec_loop_done
-
-L$cbc_dec_loop:
- movdqu 0(%r12),%xmm15
- movdqu 16(%r12),%xmm0
- movdqu 32(%r12),%xmm1
- movdqu 48(%r12),%xmm2
- movdqu 64(%r12),%xmm3
- movdqu 80(%r12),%xmm4
- movq %rsp,%rax
- movdqu 96(%r12),%xmm5
- movl %edx,%r10d
- movdqu 112(%r12),%xmm6
- movdqa %xmm14,32(%rbp)
-
- call _bsaes_decrypt8
-
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm11
- pxor %xmm10,%xmm1
- movdqu 80(%r12),%xmm12
- pxor %xmm11,%xmm6
- movdqu 96(%r12),%xmm13
- pxor %xmm12,%xmm2
- movdqu 112(%r12),%xmm14
- pxor %xmm13,%xmm4
- movdqu %xmm15,0(%r13)
- leaq 128(%r12),%r12
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- movdqu %xmm6,80(%r13)
- movdqu %xmm2,96(%r13)
- movdqu %xmm4,112(%r13)
- leaq 128(%r13),%r13
- subq $8,%r14
- jnc L$cbc_dec_loop
-
-L$cbc_dec_loop_done:
- addq $8,%r14
- jz L$cbc_dec_done
-
- movdqu 0(%r12),%xmm15
- movq %rsp,%rax
- movl %edx,%r10d
- cmpq $2,%r14
- jb L$cbc_dec_one
- movdqu 16(%r12),%xmm0
- je L$cbc_dec_two
- movdqu 32(%r12),%xmm1
- cmpq $4,%r14
- jb L$cbc_dec_three
- movdqu 48(%r12),%xmm2
- je L$cbc_dec_four
- movdqu 64(%r12),%xmm3
- cmpq $6,%r14
- jb L$cbc_dec_five
- movdqu 80(%r12),%xmm4
- je L$cbc_dec_six
- movdqu 96(%r12),%xmm5
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm11
- pxor %xmm10,%xmm1
- movdqu 80(%r12),%xmm12
- pxor %xmm11,%xmm6
- movdqu 96(%r12),%xmm14
- pxor %xmm12,%xmm2
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- movdqu %xmm6,80(%r13)
- movdqu %xmm2,96(%r13)
- jmp L$cbc_dec_done
-.p2align 4
-L$cbc_dec_six:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm11
- pxor %xmm10,%xmm1
- movdqu 80(%r12),%xmm14
- pxor %xmm11,%xmm6
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- movdqu %xmm6,80(%r13)
- jmp L$cbc_dec_done
-.p2align 4
-L$cbc_dec_five:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm10
- pxor %xmm9,%xmm3
- movdqu 64(%r12),%xmm14
- pxor %xmm10,%xmm1
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- movdqu %xmm1,64(%r13)
- jmp L$cbc_dec_done
-.p2align 4
-L$cbc_dec_four:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm9
- pxor %xmm8,%xmm5
- movdqu 48(%r12),%xmm14
- pxor %xmm9,%xmm3
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- movdqu %xmm3,48(%r13)
- jmp L$cbc_dec_done
-.p2align 4
-L$cbc_dec_three:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- pxor %xmm7,%xmm0
- movdqu 32(%r12),%xmm14
- pxor %xmm8,%xmm5
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- movdqu %xmm5,32(%r13)
- jmp L$cbc_dec_done
-.p2align 4
-L$cbc_dec_two:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm14
- pxor %xmm7,%xmm0
- movdqu %xmm15,0(%r13)
- movdqu %xmm0,16(%r13)
- jmp L$cbc_dec_done
-.p2align 4
-L$cbc_dec_one:
- movdqa %xmm14,32(%rbp)
- call _bsaes_decrypt8
- pxor 32(%rbp),%xmm15
- movdqu 0(%r12),%xmm14
- movdqu %xmm15,0(%r13)
- jmp L$cbc_dec_done
-
-L$cbc_dec_done:
- movdqu %xmm14,(%rbx)
- leaq (%rsp),%rax
- pxor %xmm0,%xmm0
-L$cbc_dec_bzero:
- movdqa %xmm0,0(%rax)
- movdqa %xmm0,16(%rax)
- leaq 32(%rax),%rax
- cmpq %rax,%rbp
- ja L$cbc_dec_bzero
-
- leaq 120(%rbp),%rax
-
- movq -48(%rax),%r15
-
- movq -40(%rax),%r14
-
- movq -32(%rax),%r13
-
- movq -24(%rax),%r12
-
- movq -16(%rax),%rbx
-
- movq -8(%rax),%rbp
-
- leaq (%rax),%rsp
-
-L$cbc_dec_epilogue:
- .byte 0xf3,0xc3
-
-
-
-.globl _bsaes_ctr32_encrypt_blocks
-.private_extern _bsaes_ctr32_encrypt_blocks
-
-.p2align 4
-_bsaes_ctr32_encrypt_blocks:
-
-#ifndef NDEBUG
-#ifndef BORINGSSL_FIPS
-
- movb $1,_BORINGSSL_function_hit+6(%rip)
-#endif
-#endif
- movq %rsp,%rax
-L$ctr_enc_prologue:
- pushq %rbp
-
- pushq %rbx
-
- pushq %r12
-
- pushq %r13
-
- pushq %r14
-
- pushq %r15
-
- leaq -72(%rsp),%rsp
-
- movq %rsp,%rbp
-
- movdqu (%r8),%xmm0
- movl 240(%rcx),%eax
- movq %rdi,%r12
- movq %rsi,%r13
- movq %rdx,%r14
- movq %rcx,%r15
- movdqa %xmm0,32(%rbp)
-
-
-
- movl %eax,%ebx
- shlq $7,%rax
- subq $96,%rax
- subq %rax,%rsp
-
- movq %rsp,%rax
- movq %r15,%rcx
- movl %ebx,%r10d
- call _bsaes_key_convert
- pxor %xmm6,%xmm7
- movdqa %xmm7,(%rax)
-
- movdqa (%rsp),%xmm8
- leaq L$ADD1(%rip),%r11
- movdqa 32(%rbp),%xmm15
- movdqa -32(%r11),%xmm7
-.byte 102,68,15,56,0,199
-.byte 102,68,15,56,0,255
- movdqa %xmm8,(%rsp)
- jmp L$ctr_enc_loop
-.p2align 4
-L$ctr_enc_loop:
- movdqa %xmm15,32(%rbp)
- movdqa %xmm15,%xmm0
- movdqa %xmm15,%xmm1
- paddd 0(%r11),%xmm0
- movdqa %xmm15,%xmm2
- paddd 16(%r11),%xmm1
- movdqa %xmm15,%xmm3
- paddd 32(%r11),%xmm2
- movdqa %xmm15,%xmm4
- paddd 48(%r11),%xmm3
- movdqa %xmm15,%xmm5
- paddd 64(%r11),%xmm4
- movdqa %xmm15,%xmm6
- paddd 80(%r11),%xmm5
- paddd 96(%r11),%xmm6
-
-
-
- movdqa (%rsp),%xmm8
- leaq 16(%rsp),%rax
- movdqa -16(%r11),%xmm7
- pxor %xmm8,%xmm15
- pxor %xmm8,%xmm0
- pxor %xmm8,%xmm1
- pxor %xmm8,%xmm2
-.byte 102,68,15,56,0,255
-.byte 102,15,56,0,199
- pxor %xmm8,%xmm3
- pxor %xmm8,%xmm4
-.byte 102,15,56,0,207
-.byte 102,15,56,0,215
- pxor %xmm8,%xmm5
- pxor %xmm8,%xmm6
-.byte 102,15,56,0,223
-.byte 102,15,56,0,231
-.byte 102,15,56,0,239
-.byte 102,15,56,0,247
- leaq L$BS0(%rip),%r11
- movl %ebx,%r10d
-
- call _bsaes_encrypt8_bitslice
-
- subq $8,%r14
- jc L$ctr_enc_loop_done
-
- movdqu 0(%r12),%xmm7
- movdqu 16(%r12),%xmm8
- movdqu 32(%r12),%xmm9
- movdqu 48(%r12),%xmm10
- movdqu 64(%r12),%xmm11
- movdqu 80(%r12),%xmm12
- movdqu 96(%r12),%xmm13
- movdqu 112(%r12),%xmm14
- leaq 128(%r12),%r12
- pxor %xmm15,%xmm7
- movdqa 32(%rbp),%xmm15
- pxor %xmm8,%xmm0
- movdqu %xmm7,0(%r13)
- pxor %xmm9,%xmm3
- movdqu %xmm0,16(%r13)
- pxor %xmm10,%xmm5
- movdqu %xmm3,32(%r13)
- pxor %xmm11,%xmm2
- movdqu %xmm5,48(%r13)
- pxor %xmm12,%xmm6
- movdqu %xmm2,64(%r13)
- pxor %xmm13,%xmm1
- movdqu %xmm6,80(%r13)
- pxor %xmm14,%xmm4
- movdqu %xmm1,96(%r13)
- leaq L$ADD1(%rip),%r11
- movdqu %xmm4,112(%r13)
- leaq 128(%r13),%r13
- paddd 112(%r11),%xmm15
- jnz L$ctr_enc_loop
-
- jmp L$ctr_enc_done
-.p2align 4
-L$ctr_enc_loop_done:
- addq $8,%r14
- movdqu 0(%r12),%xmm7
- pxor %xmm7,%xmm15
- movdqu %xmm15,0(%r13)
- cmpq $2,%r14
- jb L$ctr_enc_done
- movdqu 16(%r12),%xmm8
- pxor %xmm8,%xmm0
- movdqu %xmm0,16(%r13)
- je L$ctr_enc_done
- movdqu 32(%r12),%xmm9
- pxor %xmm9,%xmm3
- movdqu %xmm3,32(%r13)
- cmpq $4,%r14
- jb L$ctr_enc_done
- movdqu 48(%r12),%xmm10
- pxor %xmm10,%xmm5
- movdqu %xmm5,48(%r13)
- je L$ctr_enc_done
- movdqu 64(%r12),%xmm11
- pxor %xmm11,%xmm2
- movdqu %xmm2,64(%r13)
- cmpq $6,%r14
- jb L$ctr_enc_done
- movdqu 80(%r12),%xmm12
- pxor %xmm12,%xmm6
- movdqu %xmm6,80(%r13)
- je L$ctr_enc_done
- movdqu 96(%r12),%xmm13
- pxor %xmm13,%xmm1
- movdqu %xmm1,96(%r13)
-
-
-
-L$ctr_enc_done:
- leaq (%rsp),%rax
- pxor %xmm0,%xmm0
-L$ctr_enc_bzero:
- movdqa %xmm0,0(%rax)
- movdqa %xmm0,16(%rax)
- leaq 32(%rax),%rax
- cmpq %rax,%rbp
- ja L$ctr_enc_bzero
-
- leaq 120(%rbp),%rax
-
- movq -48(%rax),%r15
-
- movq -40(%rax),%r14
-
- movq -32(%rax),%r13
-
- movq -24(%rax),%r12
-
- movq -16(%rax),%rbx
-
- movq -8(%rax),%rbp
-
- leaq (%rax),%rsp
-
-L$ctr_enc_epilogue:
- .byte 0xf3,0xc3
-
-
-
-.p2align 6
-_bsaes_const:
-L$M0ISR:
-.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
-L$ISRM0:
-.quad 0x01040b0e0205080f, 0x0306090c00070a0d
-L$ISR:
-.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
-L$BS0:
-.quad 0x5555555555555555, 0x5555555555555555
-L$BS1:
-.quad 0x3333333333333333, 0x3333333333333333
-L$BS2:
-.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
-L$SR:
-.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
-L$SRM0:
-.quad 0x0304090e00050a0f, 0x01060b0c0207080d
-L$M0SR:
-.quad 0x0a0e02060f03070b, 0x0004080c05090d01
-L$SWPUP:
-.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
-L$SWPUPM0SR:
-.quad 0x0a0d02060c03070b, 0x0004080f05090e01
-L$ADD1:
-.quad 0x0000000000000000, 0x0000000100000000
-L$ADD2:
-.quad 0x0000000000000000, 0x0000000200000000
-L$ADD3:
-.quad 0x0000000000000000, 0x0000000300000000
-L$ADD4:
-.quad 0x0000000000000000, 0x0000000400000000
-L$ADD5:
-.quad 0x0000000000000000, 0x0000000500000000
-L$ADD6:
-.quad 0x0000000000000000, 0x0000000600000000
-L$ADD7:
-.quad 0x0000000000000000, 0x0000000700000000
-L$ADD8:
-.quad 0x0000000000000000, 0x0000000800000000
-L$xts_magic:
-.long 0x87,0,1,0
-L$masks:
-.quad 0x0101010101010101, 0x0101010101010101
-.quad 0x0202020202020202, 0x0202020202020202
-.quad 0x0404040404040404, 0x0404040404040404
-.quad 0x0808080808080808, 0x0808080808080808
-L$M0:
-.quad 0x02060a0e03070b0f, 0x0004080c0105090d
-L$63:
-.quad 0x6363636363636363, 0x6363636363636363
-.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
-.p2align 6
-
-#endif
diff --git a/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
index 85916188..2f60a22c 100644
--- a/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
+++ b/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
@@ -121,6 +121,181 @@ L$enc_entry:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_encrypt_core_2x:
+
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa %xmm9,%xmm7
+ movdqa L$k_ipt(%rip),%xmm2
+ movdqa %xmm2,%xmm8
+ pandn %xmm0,%xmm1
+ pandn %xmm6,%xmm7
+ movdqu (%r9),%xmm5
+
+ psrld $4,%xmm1
+ psrld $4,%xmm7
+ pand %xmm9,%xmm0
+ pand %xmm9,%xmm6
+.byte 102,15,56,0,208
+.byte 102,68,15,56,0,198
+ movdqa L$k_ipt+16(%rip),%xmm0
+ movdqa %xmm0,%xmm6
+.byte 102,15,56,0,193
+.byte 102,15,56,0,247
+ pxor %xmm5,%xmm2
+ pxor %xmm5,%xmm8
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+ pxor %xmm8,%xmm6
+ leaq L$k_mc_backward(%rip),%r10
+ jmp L$enc2x_entry
+
+.p2align 4
+L$enc2x_loop:
+
+ movdqa L$k_sb1(%rip),%xmm4
+ movdqa L$k_sb1+16(%rip),%xmm0
+ movdqa %xmm4,%xmm12
+ movdqa %xmm0,%xmm6
+.byte 102,15,56,0,226
+.byte 102,69,15,56,0,224
+.byte 102,15,56,0,195
+.byte 102,65,15,56,0,243
+ pxor %xmm5,%xmm4
+ pxor %xmm5,%xmm12
+ movdqa L$k_sb2(%rip),%xmm5
+ movdqa %xmm5,%xmm13
+ pxor %xmm4,%xmm0
+ pxor %xmm12,%xmm6
+ movdqa -64(%r11,%r10,1),%xmm1
+
+.byte 102,15,56,0,234
+.byte 102,69,15,56,0,232
+ movdqa (%r11,%r10,1),%xmm4
+
+ movdqa L$k_sb2+16(%rip),%xmm2
+ movdqa %xmm2,%xmm8
+.byte 102,15,56,0,211
+.byte 102,69,15,56,0,195
+ movdqa %xmm0,%xmm3
+ movdqa %xmm6,%xmm11
+ pxor %xmm5,%xmm2
+ pxor %xmm13,%xmm8
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,220
+.byte 102,68,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm11
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ andq $0x30,%r11
+ subq $1,%rax
+ pxor %xmm3,%xmm0
+ pxor %xmm11,%xmm6
+
+L$enc2x_entry:
+
+ movdqa %xmm9,%xmm1
+ movdqa %xmm9,%xmm7
+ movdqa L$k_inv+16(%rip),%xmm5
+ movdqa %xmm5,%xmm13
+ pandn %xmm0,%xmm1
+ pandn %xmm6,%xmm7
+ psrld $4,%xmm1
+ psrld $4,%xmm7
+ pand %xmm9,%xmm0
+ pand %xmm9,%xmm6
+.byte 102,15,56,0,232
+.byte 102,68,15,56,0,238
+ movdqa %xmm10,%xmm3
+ movdqa %xmm10,%xmm11
+ pxor %xmm1,%xmm0
+ pxor %xmm7,%xmm6
+.byte 102,15,56,0,217
+.byte 102,68,15,56,0,223
+ movdqa %xmm10,%xmm4
+ movdqa %xmm10,%xmm12
+ pxor %xmm5,%xmm3
+ pxor %xmm13,%xmm11
+.byte 102,15,56,0,224
+.byte 102,68,15,56,0,230
+ movdqa %xmm10,%xmm2
+ movdqa %xmm10,%xmm8
+ pxor %xmm5,%xmm4
+ pxor %xmm13,%xmm12
+.byte 102,15,56,0,211
+.byte 102,69,15,56,0,195
+ movdqa %xmm10,%xmm3
+ movdqa %xmm10,%xmm11
+ pxor %xmm0,%xmm2
+ pxor %xmm6,%xmm8
+.byte 102,15,56,0,220
+.byte 102,69,15,56,0,220
+ movdqu (%r9),%xmm5
+
+ pxor %xmm1,%xmm3
+ pxor %xmm7,%xmm11
+ jnz L$enc2x_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+ movdqa %xmm4,%xmm12
+ movdqa %xmm0,%xmm6
+.byte 102,15,56,0,226
+.byte 102,69,15,56,0,224
+ pxor %xmm5,%xmm4
+ pxor %xmm5,%xmm12
+.byte 102,15,56,0,195
+.byte 102,65,15,56,0,243
+ movdqa 64(%r11,%r10,1),%xmm1
+
+ pxor %xmm4,%xmm0
+ pxor %xmm12,%xmm6
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ .byte 0xf3,0xc3
+
+
+
+
+
+
+
+
+
.p2align 4
_vpaes_decrypt_core:
@@ -757,6 +932,69 @@ L$cbc_abort:
.byte 0xf3,0xc3
+.globl _vpaes_ctr32_encrypt_blocks
+.private_extern _vpaes_ctr32_encrypt_blocks
+
+.p2align 4
+_vpaes_ctr32_encrypt_blocks:
+
+
+ xchgq %rcx,%rdx
+ testq %rcx,%rcx
+ jz L$ctr32_abort
+ movdqu (%r8),%xmm0
+ movdqa L$ctr_add_one(%rip),%xmm8
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ movdqa %xmm0,%xmm6
+ pshufb L$rev_ctr(%rip),%xmm6
+
+ testq $1,%rcx
+ jz L$ctr32_prep_loop
+
+
+
+ movdqu (%rdi),%xmm7
+ call _vpaes_encrypt_core
+ pxor %xmm7,%xmm0
+ paddd %xmm8,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ subq $1,%rcx
+ leaq 16(%rdi),%rdi
+ jz L$ctr32_done
+
+L$ctr32_prep_loop:
+
+
+ movdqa %xmm6,%xmm14
+ movdqa %xmm6,%xmm15
+ paddd %xmm8,%xmm15
+
+L$ctr32_loop:
+ movdqa L$rev_ctr(%rip),%xmm1
+ movdqa %xmm14,%xmm0
+ movdqa %xmm15,%xmm6
+.byte 102,15,56,0,193
+.byte 102,15,56,0,241
+ call _vpaes_encrypt_core_2x
+ movdqu (%rdi),%xmm1
+ movdqu 16(%rdi),%xmm2
+ movdqa L$ctr_add_two(%rip),%xmm3
+ pxor %xmm1,%xmm0
+ pxor %xmm2,%xmm6
+ paddd %xmm3,%xmm14
+ paddd %xmm3,%xmm15
+ movdqu %xmm0,(%rsi,%rdi,1)
+ movdqu %xmm6,16(%rsi,%rdi,1)
+ subq $2,%rcx
+ leaq 32(%rdi),%rdi
+ jnz L$ctr32_loop
+
+L$ctr32_done:
+L$ctr32_abort:
+ .byte 0xf3,0xc3
+
+
@@ -879,6 +1117,17 @@ L$k_dsbe:
L$k_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+
+
+L$rev_ctr:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+
+
+L$ctr_add_one:
+.quad 0x0000000000000000, 0x0000000100000000
+L$ctr_add_two:
+.quad 0x0000000000000000, 0x0000000200000000
+
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.p2align 6
diff --git a/sources.bp b/sources.bp
index 25e406fa..de4e55b2 100644
--- a/sources.bp
+++ b/sources.bp
@@ -299,7 +299,6 @@ cc_defaults {
"linux-x86_64/crypto/fipsmodule/aes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
- "linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
"linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
diff --git a/sources.mk b/sources.mk
index 617ac2ad..6bb17705 100644
--- a/sources.mk
+++ b/sources.mk
@@ -293,7 +293,6 @@ linux_x86_64_sources := \
linux-x86_64/crypto/fipsmodule/aes-x86_64.S\
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S\
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S\
- linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S\
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S\
linux-x86_64/crypto/fipsmodule/ghash-x86_64.S\
linux-x86_64/crypto/fipsmodule/md5-x86_64.S\
diff --git a/src/crypto/asn1/a_int.c b/src/crypto/asn1/a_int.c
index 6dc18bad..7b483f2d 100644
--- a/src/crypto/asn1/a_int.c
+++ b/src/crypto/asn1/a_int.c
@@ -195,6 +195,16 @@ ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a, const unsigned char **pp,
unsigned char *to, *s;
int i;
+ /*
+ * This function can handle lengths up to INT_MAX - 1, but the rest of the
+ * legacy ASN.1 code mixes integer types, so avoid exposing it to
+ * ASN1_INTEGERS with larger lengths.
+ */
+ if (len < 0 || len > INT_MAX / 2) {
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_TOO_LONG);
+ return NULL;
+ }
+
if ((a == NULL) || ((*a) == NULL)) {
if ((ret = M_ASN1_INTEGER_new()) == NULL)
return (NULL);
@@ -276,75 +286,6 @@ ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a, const unsigned char **pp,
return (NULL);
}
-/*
- * This is a version of d2i_ASN1_INTEGER that ignores the sign bit of ASN1
- * integers: some broken software can encode a positive INTEGER with its MSB
- * set as negative (it doesn't add a padding zero).
- */
-
-ASN1_INTEGER *d2i_ASN1_UINTEGER(ASN1_INTEGER **a, const unsigned char **pp,
- long length)
-{
- ASN1_INTEGER *ret = NULL;
- const unsigned char *p;
- unsigned char *s;
- long len;
- int inf, tag, xclass;
- int i;
-
- if ((a == NULL) || ((*a) == NULL)) {
- if ((ret = M_ASN1_INTEGER_new()) == NULL)
- return (NULL);
- ret->type = V_ASN1_INTEGER;
- } else
- ret = (*a);
-
- p = *pp;
- inf = ASN1_get_object(&p, &len, &tag, &xclass, length);
- if (inf & 0x80) {
- i = ASN1_R_BAD_OBJECT_HEADER;
- goto err;
- }
-
- if (tag != V_ASN1_INTEGER) {
- i = ASN1_R_EXPECTING_AN_INTEGER;
- goto err;
- }
-
- /*
- * We must OPENSSL_malloc stuff, even for 0 bytes otherwise it signifies
- * a missing NULL parameter.
- */
- s = (unsigned char *)OPENSSL_malloc((int)len + 1);
- if (s == NULL) {
- i = ERR_R_MALLOC_FAILURE;
- goto err;
- }
- ret->type = V_ASN1_INTEGER;
- if (len) {
- if ((*p == 0) && (len != 1)) {
- p++;
- len--;
- }
- OPENSSL_memcpy(s, p, (int)len);
- p += len;
- }
-
- if (ret->data != NULL)
- OPENSSL_free(ret->data);
- ret->data = s;
- ret->length = (int)len;
- if (a != NULL)
- (*a) = ret;
- *pp = p;
- return (ret);
- err:
- OPENSSL_PUT_ERROR(ASN1, i);
- if ((ret != NULL) && ((a == NULL) || (*a != ret)))
- M_ASN1_INTEGER_free(ret);
- return (NULL);
-}
-
int ASN1_INTEGER_set(ASN1_INTEGER *a, long v)
{
if (v >= 0) {
diff --git a/src/crypto/asn1/asn1_lib.c b/src/crypto/asn1/asn1_lib.c
index ea727f33..8526aba3 100644
--- a/src/crypto/asn1/asn1_lib.c
+++ b/src/crypto/asn1/asn1_lib.c
@@ -205,7 +205,11 @@ static int asn1_get_length(const unsigned char **pp, int *inf, long *rl,
} else
ret = i;
}
- if (ret > LONG_MAX)
+ /*
+ * Bound the length to comfortably fit in an int. Lengths in this module
+ * often switch between int and long without overflow checks.
+ */
+ if (ret > INT_MAX / 2)
return 0;
*pp = p;
*rl = (long)ret;
diff --git a/src/crypto/cipher_extra/e_aesccm.c b/src/crypto/cipher_extra/e_aesccm.c
index 4e6668c0..3e186593 100644
--- a/src/crypto/cipher_extra/e_aesccm.c
+++ b/src/crypto/cipher_extra/e_aesccm.c
@@ -66,8 +66,7 @@ static int aead_aes_ccm_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
struct aead_aes_ccm_ctx *ccm_ctx = (struct aead_aes_ccm_ctx *)&ctx->state;
block128_f block;
- ctr128_f ctr = aes_ctr_set_key(&ccm_ctx->ks.ks, NULL, &block, key, key_len,
- 0 /* small inputs */);
+ ctr128_f ctr = aes_ctr_set_key(&ccm_ctx->ks.ks, NULL, &block, key, key_len);
ctx->tag_len = tag_len;
if (!CRYPTO_ccm128_init(&ccm_ctx->ccm, &ccm_ctx->ks.ks, block, ctr, M, L)) {
OPENSSL_PUT_ERROR(CIPHER, ERR_R_INTERNAL_ERROR);
diff --git a/src/crypto/cipher_extra/e_aesctrhmac.c b/src/crypto/cipher_extra/e_aesctrhmac.c
index 0834bd1d..8c45c811 100644
--- a/src/crypto/cipher_extra/e_aesctrhmac.c
+++ b/src/crypto/cipher_extra/e_aesctrhmac.c
@@ -94,8 +94,8 @@ static int aead_aes_ctr_hmac_sha256_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
return 0;
}
- aes_ctx->ctr = aes_ctr_set_key(&aes_ctx->ks.ks, NULL, &aes_ctx->block, key,
- aes_key_len, 1 /* large inputs */);
+ aes_ctx->ctr =
+ aes_ctr_set_key(&aes_ctx->ks.ks, NULL, &aes_ctx->block, key, aes_key_len);
ctx->tag_len = tag_len;
hmac_init(&aes_ctx->inner_init_state, &aes_ctx->outer_init_state,
key + aes_key_len);
diff --git a/src/crypto/cipher_extra/e_aesgcmsiv.c b/src/crypto/cipher_extra/e_aesgcmsiv.c
index fb08a428..71a71fac 100644
--- a/src/crypto/cipher_extra/e_aesgcmsiv.c
+++ b/src/crypto/cipher_extra/e_aesgcmsiv.c
@@ -595,7 +595,7 @@ static int aead_aes_gcm_siv_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
OPENSSL_memset(gcm_siv_ctx, 0, sizeof(struct aead_aes_gcm_siv_ctx));
aes_ctr_set_key(&gcm_siv_ctx->ks.ks, NULL, &gcm_siv_ctx->kgk_block, key,
- key_len, 0 /* small inputs */);
+ key_len);
gcm_siv_ctx->is_256 = (key_len == 32);
ctx->tag_len = tag_len;
@@ -719,8 +719,7 @@ static void gcm_siv_keys(
OPENSSL_memcpy(out_keys->auth_key, key_material, 16);
aes_ctr_set_key(&out_keys->enc_key.ks, NULL, &out_keys->enc_block,
- key_material + 16, gcm_siv_ctx->is_256 ? 32 : 16,
- 0 /* small inputs */);
+ key_material + 16, gcm_siv_ctx->is_256 ? 32 : 16);
}
static int aead_aes_gcm_siv_seal_scatter(
diff --git a/src/crypto/fipsmodule/CMakeLists.txt b/src/crypto/fipsmodule/CMakeLists.txt
index fbf25ac8..d1e2cb9d 100644
--- a/src/crypto/fipsmodule/CMakeLists.txt
+++ b/src/crypto/fipsmodule/CMakeLists.txt
@@ -7,7 +7,6 @@ if(${ARCH} STREQUAL "x86_64")
aesni-gcm-x86_64.${ASM_EXT}
aesni-x86_64.${ASM_EXT}
aes-x86_64.${ASM_EXT}
- bsaes-x86_64.${ASM_EXT}
ghash-ssse3-x86_64.${ASM_EXT}
ghash-x86_64.${ASM_EXT}
md5-x86_64.${ASM_EXT}
@@ -95,7 +94,6 @@ perlasm(armv4-mont.${ASM_EXT} bn/asm/armv4-mont.pl)
perlasm(armv8-mont.${ASM_EXT} bn/asm/armv8-mont.pl)
perlasm(bn-586.${ASM_EXT} bn/asm/bn-586.pl)
perlasm(bsaes-armv7.${ASM_EXT} aes/asm/bsaes-armv7.pl)
-perlasm(bsaes-x86_64.${ASM_EXT} aes/asm/bsaes-x86_64.pl)
perlasm(co-586.${ASM_EXT} bn/asm/co-586.pl)
perlasm(ghash-armv4.${ASM_EXT} modes/asm/ghash-armv4.pl)
perlasm(ghashp8-ppc.${ASM_EXT} modes/asm/ghashp8-ppc.pl)
diff --git a/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
deleted file mode 100644
index 3bb28190..00000000
--- a/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
+++ /dev/null
@@ -1,3227 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-
-###################################################################
-### AES-128 [originally in CTR mode] ###
-### bitsliced implementation for Intel Core 2 processors ###
-### requires support of SSE extensions up to SSSE3 ###
-### Author: Emilia Käsper and Peter Schwabe ###
-### Date: 2009-03-19 ###
-### Public domain ###
-### ###
-### See http://homes.esat.kuleuven.be/~ekasper/#software for ###
-### further information. ###
-###################################################################
-#
-# September 2011.
-#
-# Started as transliteration to "perlasm" the original code has
-# undergone following changes:
-#
-# - code was made position-independent;
-# - rounds were folded into a loop resulting in >5x size reduction
-# from 12.5KB to 2.2KB;
-# - above was possibile thanks to mixcolumns() modification that
-# allowed to feed its output back to aesenc[last], this was
-# achieved at cost of two additional inter-registers moves;
-# - some instruction reordering and interleaving;
-# - this module doesn't implement key setup subroutine, instead it
-# relies on conversion of "conventional" key schedule as returned
-# by AES_set_encrypt_key (see discussion below);
-# - first and last round keys are treated differently, which allowed
-# to skip one shiftrows(), reduce bit-sliced key schedule and
-# speed-up conversion by 22%;
-# - support for 192- and 256-bit keys was added;
-#
-# Resulting performance in CPU cycles spent to encrypt one byte out
-# of 4096-byte buffer with 128-bit key is:
-#
-# Emilia's this(*) difference
-#
-# Core 2 9.30 8.69 +7%
-# Nehalem(**) 7.63 6.88 +11%
-# Atom 17.1 16.4 +4%
-# Silvermont - 12.9
-# Goldmont - 8.85
-#
-# (*) Comparison is not completely fair, because "this" is ECB,
-# i.e. no extra processing such as counter values calculation
-# and xor-ing input as in Emilia's CTR implementation is
-# performed. However, the CTR calculations stand for not more
-# than 1% of total time, so comparison is *rather* fair.
-#
-# (**) Results were collected on Westmere, which is considered to
-# be equivalent to Nehalem for this code.
-#
-# As for key schedule conversion subroutine. Interface to OpenSSL
-# relies on per-invocation on-the-fly conversion. This naturally
-# has impact on performance, especially for short inputs. Conversion
-# time in CPU cycles and its ratio to CPU cycles spent in 8x block
-# function is:
-#
-# conversion conversion/8x block
-# Core 2 240 0.22
-# Nehalem 180 0.20
-# Atom 430 0.20
-#
-# The ratio values mean that 128-byte blocks will be processed
-# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%,
-# etc. Then keep in mind that input sizes not divisible by 128 are
-# *effectively* slower, especially shortest ones, e.g. consecutive
-# 144-byte blocks are processed 44% slower than one would expect,
-# 272 - 29%, 400 - 22%, etc. Yet, despite all these "shortcomings"
-# it's still faster than ["hyper-threading-safe" code path in]
-# aes-x86_64.pl on all lengths above 64 bytes...
-#
-# October 2011.
-#
-# Add decryption procedure. Performance in CPU cycles spent to decrypt
-# one byte out of 4096-byte buffer with 128-bit key is:
-#
-# Core 2 9.98
-# Nehalem 7.80
-# Atom 17.9
-# Silvermont 14.0
-# Goldmont 10.2
-#
-# November 2011.
-#
-# Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is
-# suboptimal, but XTS is meant to be used with larger blocks...
-#
-# <appro@openssl.org>
-
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT=*OUT;
-
-my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
-my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15)
-my $ecb=0; # suppress unreferenced ECB subroutines, spare some space...
-my $xts=0; # Also patch out the XTS subroutines.
-
-{
-my ($key,$rounds,$const)=("%rax","%r10d","%r11");
-
-sub Sbox {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
-my @b=@_[0..7];
-my @t=@_[8..11];
-my @s=@_[12..15];
- &InBasisChange (@b);
- &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
- &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
-}
-
-sub InBasisChange {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
-my @b=@_[0..7];
-$code.=<<___;
- pxor @b[6], @b[5]
- pxor @b[1], @b[2]
- pxor @b[0], @b[3]
- pxor @b[2], @b[6]
- pxor @b[0], @b[5]
-
- pxor @b[3], @b[6]
- pxor @b[7], @b[3]
- pxor @b[5], @b[7]
- pxor @b[4], @b[3]
- pxor @b[5], @b[4]
- pxor @b[1], @b[3]
-
- pxor @b[7], @b[2]
- pxor @b[5], @b[1]
-___
-}
-
-sub OutBasisChange {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
-my @b=@_[0..7];
-$code.=<<___;
- pxor @b[6], @b[0]
- pxor @b[4], @b[1]
- pxor @b[0], @b[2]
- pxor @b[6], @b[4]
- pxor @b[1], @b[6]
-
- pxor @b[5], @b[1]
- pxor @b[3], @b[5]
- pxor @b[7], @b[3]
- pxor @b[5], @b[7]
- pxor @b[5], @b[2]
-
- pxor @b[7], @b[4]
-___
-}
-
-sub InvSbox {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
-my @b=@_[0..7];
-my @t=@_[8..11];
-my @s=@_[12..15];
- &InvInBasisChange (@b);
- &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
- &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
-}
-
-sub InvInBasisChange { # OutBasisChange in reverse
-my @b=@_[5,1,2,6,3,7,0,4];
-$code.=<<___
- pxor @b[7], @b[4]
-
- pxor @b[5], @b[7]
- pxor @b[5], @b[2]
- pxor @b[7], @b[3]
- pxor @b[3], @b[5]
- pxor @b[5], @b[1]
-
- pxor @b[1], @b[6]
- pxor @b[0], @b[2]
- pxor @b[6], @b[4]
- pxor @b[6], @b[0]
- pxor @b[4], @b[1]
-___
-}
-
-sub InvOutBasisChange { # InBasisChange in reverse
-my @b=@_[2,5,7,3,6,1,0,4];
-$code.=<<___;
- pxor @b[5], @b[1]
- pxor @b[7], @b[2]
-
- pxor @b[1], @b[3]
- pxor @b[5], @b[4]
- pxor @b[5], @b[7]
- pxor @b[4], @b[3]
- pxor @b[0], @b[5]
- pxor @b[7], @b[3]
- pxor @b[2], @b[6]
- pxor @b[1], @b[2]
- pxor @b[3], @b[6]
-
- pxor @b[0], @b[3]
- pxor @b[6], @b[5]
-___
-}
-
-sub Mul_GF4 {
-#;*************************************************************
-#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
-#;*************************************************************
-my ($x0,$x1,$y0,$y1,$t0)=@_;
-$code.=<<___;
- movdqa $y0, $t0
- pxor $y1, $t0
- pand $x0, $t0
- pxor $x1, $x0
- pand $y0, $x1
- pand $y1, $x0
- pxor $x1, $x0
- pxor $t0, $x1
-___
-}
-
-sub Mul_GF4_N { # not used, see next subroutine
-# multiply and scale by N
-my ($x0,$x1,$y0,$y1,$t0)=@_;
-$code.=<<___;
- movdqa $y0, $t0
- pxor $y1, $t0
- pand $x0, $t0
- pxor $x1, $x0
- pand $y0, $x1
- pand $y1, $x0
- pxor $x0, $x1
- pxor $t0, $x0
-___
-}
-
-sub Mul_GF4_N_GF4 {
-# interleaved Mul_GF4_N and Mul_GF4
-my ($x0,$x1,$y0,$y1,$t0,
- $x2,$x3,$y2,$y3,$t1)=@_;
-$code.=<<___;
- movdqa $y0, $t0
- movdqa $y2, $t1
- pxor $y1, $t0
- pxor $y3, $t1
- pand $x0, $t0
- pand $x2, $t1
- pxor $x1, $x0
- pxor $x3, $x2
- pand $y0, $x1
- pand $y2, $x3
- pand $y1, $x0
- pand $y3, $x2
- pxor $x0, $x1
- pxor $x3, $x2
- pxor $t0, $x0
- pxor $t1, $x3
-___
-}
-sub Mul_GF16_2 {
-my @x=@_[0..7];
-my @y=@_[8..11];
-my @t=@_[12..15];
-$code.=<<___;
- movdqa @x[0], @t[0]
- movdqa @x[1], @t[1]
-___
- &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2]);
-$code.=<<___;
- pxor @x[2], @t[0]
- pxor @x[3], @t[1]
- pxor @y[2], @y[0]
- pxor @y[3], @y[1]
-___
- Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
- @x[2], @x[3], @y[2], @y[3], @t[2]);
-$code.=<<___;
- pxor @t[0], @x[0]
- pxor @t[0], @x[2]
- pxor @t[1], @x[1]
- pxor @t[1], @x[3]
-
- movdqa @x[4], @t[0]
- movdqa @x[5], @t[1]
- pxor @x[6], @t[0]
- pxor @x[7], @t[1]
-___
- &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
- @x[6], @x[7], @y[2], @y[3], @t[2]);
-$code.=<<___;
- pxor @y[2], @y[0]
- pxor @y[3], @y[1]
-___
- &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[3]);
-$code.=<<___;
- pxor @t[0], @x[4]
- pxor @t[0], @x[6]
- pxor @t[1], @x[5]
- pxor @t[1], @x[7]
-___
-}
-sub Inv_GF256 {
-#;********************************************************************
-#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
-#;********************************************************************
-my @x=@_[0..7];
-my @t=@_[8..11];
-my @s=@_[12..15];
-# direct optimizations from hardware
-$code.=<<___;
- movdqa @x[4], @t[3]
- movdqa @x[5], @t[2]
- movdqa @x[1], @t[1]
- movdqa @x[7], @s[1]
- movdqa @x[0], @s[0]
-
- pxor @x[6], @t[3]
- pxor @x[7], @t[2]
- pxor @x[3], @t[1]
- movdqa @t[3], @s[2]
- pxor @x[6], @s[1]
- movdqa @t[2], @t[0]
- pxor @x[2], @s[0]
- movdqa @t[3], @s[3]
-
- por @t[1], @t[2]
- por @s[0], @t[3]
- pxor @t[0], @s[3]
- pand @s[0], @s[2]
- pxor @t[1], @s[0]
- pand @t[1], @t[0]
- pand @s[0], @s[3]
- movdqa @x[3], @s[0]
- pxor @x[2], @s[0]
- pand @s[0], @s[1]
- pxor @s[1], @t[3]
- pxor @s[1], @t[2]
- movdqa @x[4], @s[1]
- movdqa @x[1], @s[0]
- pxor @x[5], @s[1]
- pxor @x[0], @s[0]
- movdqa @s[1], @t[1]
- pand @s[0], @s[1]
- por @s[0], @t[1]
- pxor @s[1], @t[0]
- pxor @s[3], @t[3]
- pxor @s[2], @t[2]
- pxor @s[3], @t[1]
- movdqa @x[7], @s[0]
- pxor @s[2], @t[0]
- movdqa @x[6], @s[1]
- pxor @s[2], @t[1]
- movdqa @x[5], @s[2]
- pand @x[3], @s[0]
- movdqa @x[4], @s[3]
- pand @x[2], @s[1]
- pand @x[1], @s[2]
- por @x[0], @s[3]
- pxor @s[0], @t[3]
- pxor @s[1], @t[2]
- pxor @s[2], @t[1]
- pxor @s[3], @t[0]
-
- #Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
-
- # new smaller inversion
-
- movdqa @t[3], @s[0]
- pand @t[1], @t[3]
- pxor @t[2], @s[0]
-
- movdqa @t[0], @s[2]
- movdqa @s[0], @s[3]
- pxor @t[3], @s[2]
- pand @s[2], @s[3]
-
- movdqa @t[1], @s[1]
- pxor @t[2], @s[3]
- pxor @t[0], @s[1]
-
- pxor @t[2], @t[3]
-
- pand @t[3], @s[1]
-
- movdqa @s[2], @t[2]
- pxor @t[0], @s[1]
-
- pxor @s[1], @t[2]
- pxor @s[1], @t[1]
-
- pand @t[0], @t[2]
-
- pxor @t[2], @s[2]
- pxor @t[2], @t[1]
-
- pand @s[3], @s[2]
-
- pxor @s[0], @s[2]
-___
-# output in s3, s2, s1, t1
-
-# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
-
-# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
- &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
-
-### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
-}
-
-# AES linear components
-
-sub ShiftRows {
-my @x=@_[0..7];
-my $mask=pop;
-$code.=<<___;
- pxor 0x00($key),@x[0]
- pxor 0x10($key),@x[1]
- pxor 0x20($key),@x[2]
- pxor 0x30($key),@x[3]
- pshufb $mask,@x[0]
- pshufb $mask,@x[1]
- pxor 0x40($key),@x[4]
- pxor 0x50($key),@x[5]
- pshufb $mask,@x[2]
- pshufb $mask,@x[3]
- pxor 0x60($key),@x[6]
- pxor 0x70($key),@x[7]
- pshufb $mask,@x[4]
- pshufb $mask,@x[5]
- pshufb $mask,@x[6]
- pshufb $mask,@x[7]
- lea 0x80($key),$key
-___
-}
-
-sub MixColumns {
-# modified to emit output in order suitable for feeding back to aesenc[last]
-my @x=@_[0..7];
-my @t=@_[8..15];
-my $inv=@_[16]; # optional
-$code.=<<___;
- pshufd \$0x93, @x[0], @t[0] # x0 <<< 32
- pshufd \$0x93, @x[1], @t[1]
- pxor @t[0], @x[0] # x0 ^ (x0 <<< 32)
- pshufd \$0x93, @x[2], @t[2]
- pxor @t[1], @x[1]
- pshufd \$0x93, @x[3], @t[3]
- pxor @t[2], @x[2]
- pshufd \$0x93, @x[4], @t[4]
- pxor @t[3], @x[3]
- pshufd \$0x93, @x[5], @t[5]
- pxor @t[4], @x[4]
- pshufd \$0x93, @x[6], @t[6]
- pxor @t[5], @x[5]
- pshufd \$0x93, @x[7], @t[7]
- pxor @t[6], @x[6]
- pxor @t[7], @x[7]
-
- pxor @x[0], @t[1]
- pxor @x[7], @t[0]
- pxor @x[7], @t[1]
- pshufd \$0x4E, @x[0], @x[0] # (x0 ^ (x0 <<< 32)) <<< 64)
- pxor @x[1], @t[2]
- pshufd \$0x4E, @x[1], @x[1]
- pxor @x[4], @t[5]
- pxor @t[0], @x[0]
- pxor @x[5], @t[6]
- pxor @t[1], @x[1]
- pxor @x[3], @t[4]
- pshufd \$0x4E, @x[4], @t[0]
- pxor @x[6], @t[7]
- pshufd \$0x4E, @x[5], @t[1]
- pxor @x[2], @t[3]
- pshufd \$0x4E, @x[3], @x[4]
- pxor @x[7], @t[3]
- pshufd \$0x4E, @x[7], @x[5]
- pxor @x[7], @t[4]
- pshufd \$0x4E, @x[6], @x[3]
- pxor @t[4], @t[0]
- pshufd \$0x4E, @x[2], @x[6]
- pxor @t[5], @t[1]
-___
-$code.=<<___ if (!$inv);
- pxor @t[3], @x[4]
- pxor @t[7], @x[5]
- pxor @t[6], @x[3]
- movdqa @t[0], @x[2]
- pxor @t[2], @x[6]
- movdqa @t[1], @x[7]
-___
-$code.=<<___ if ($inv);
- pxor @x[4], @t[3]
- pxor @t[7], @x[5]
- pxor @x[3], @t[6]
- movdqa @t[0], @x[3]
- pxor @t[2], @x[6]
- movdqa @t[6], @x[2]
- movdqa @t[1], @x[7]
- movdqa @x[6], @x[4]
- movdqa @t[3], @x[6]
-___
-}
-
-sub InvMixColumns_orig {
-my @x=@_[0..7];
-my @t=@_[8..15];
-
-$code.=<<___;
- # multiplication by 0x0e
- pshufd \$0x93, @x[7], @t[7]
- movdqa @x[2], @t[2]
- pxor @x[5], @x[7] # 7 5
- pxor @x[5], @x[2] # 2 5
- pshufd \$0x93, @x[0], @t[0]
- movdqa @x[5], @t[5]
- pxor @x[0], @x[5] # 5 0 [1]
- pxor @x[1], @x[0] # 0 1
- pshufd \$0x93, @x[1], @t[1]
- pxor @x[2], @x[1] # 1 25
- pxor @x[6], @x[0] # 01 6 [2]
- pxor @x[3], @x[1] # 125 3 [4]
- pshufd \$0x93, @x[3], @t[3]
- pxor @x[0], @x[2] # 25 016 [3]
- pxor @x[7], @x[3] # 3 75
- pxor @x[6], @x[7] # 75 6 [0]
- pshufd \$0x93, @x[6], @t[6]
- movdqa @x[4], @t[4]
- pxor @x[4], @x[6] # 6 4
- pxor @x[3], @x[4] # 4 375 [6]
- pxor @x[7], @x[3] # 375 756=36
- pxor @t[5], @x[6] # 64 5 [7]
- pxor @t[2], @x[3] # 36 2
- pxor @t[4], @x[3] # 362 4 [5]
- pshufd \$0x93, @t[5], @t[5]
-___
- my @y = @x[7,5,0,2,1,3,4,6];
-$code.=<<___;
- # multiplication by 0x0b
- pxor @y[0], @y[1]
- pxor @t[0], @y[0]
- pxor @t[1], @y[1]
- pshufd \$0x93, @t[2], @t[2]
- pxor @t[5], @y[0]
- pxor @t[6], @y[1]
- pxor @t[7], @y[0]
- pshufd \$0x93, @t[4], @t[4]
- pxor @t[6], @t[7] # clobber t[7]
- pxor @y[0], @y[1]
-
- pxor @t[0], @y[3]
- pshufd \$0x93, @t[0], @t[0]
- pxor @t[1], @y[2]
- pxor @t[1], @y[4]
- pxor @t[2], @y[2]
- pshufd \$0x93, @t[1], @t[1]
- pxor @t[2], @y[3]
- pxor @t[2], @y[5]
- pxor @t[7], @y[2]
- pshufd \$0x93, @t[2], @t[2]
- pxor @t[3], @y[3]
- pxor @t[3], @y[6]
- pxor @t[3], @y[4]
- pshufd \$0x93, @t[3], @t[3]
- pxor @t[4], @y[7]
- pxor @t[4], @y[5]
- pxor @t[7], @y[7]
- pxor @t[5], @y[3]
- pxor @t[4], @y[4]
- pxor @t[5], @t[7] # clobber t[7] even more
-
- pxor @t[7], @y[5]
- pshufd \$0x93, @t[4], @t[4]
- pxor @t[7], @y[6]
- pxor @t[7], @y[4]
-
- pxor @t[5], @t[7]
- pshufd \$0x93, @t[5], @t[5]
- pxor @t[6], @t[7] # restore t[7]
-
- # multiplication by 0x0d
- pxor @y[7], @y[4]
- pxor @t[4], @y[7]
- pshufd \$0x93, @t[6], @t[6]
- pxor @t[0], @y[2]
- pxor @t[5], @y[7]
- pxor @t[2], @y[2]
- pshufd \$0x93, @t[7], @t[7]
-
- pxor @y[1], @y[3]
- pxor @t[1], @y[1]
- pxor @t[0], @y[0]
- pxor @t[0], @y[3]
- pxor @t[5], @y[1]
- pxor @t[5], @y[0]
- pxor @t[7], @y[1]
- pshufd \$0x93, @t[0], @t[0]
- pxor @t[6], @y[0]
- pxor @y[1], @y[3]
- pxor @t[1], @y[4]
- pshufd \$0x93, @t[1], @t[1]
-
- pxor @t[7], @y[7]
- pxor @t[2], @y[4]
- pxor @t[2], @y[5]
- pshufd \$0x93, @t[2], @t[2]
- pxor @t[6], @y[2]
- pxor @t[3], @t[6] # clobber t[6]
- pxor @y[7], @y[4]
- pxor @t[6], @y[3]
-
- pxor @t[6], @y[6]
- pxor @t[5], @y[5]
- pxor @t[4], @y[6]
- pshufd \$0x93, @t[4], @t[4]
- pxor @t[6], @y[5]
- pxor @t[7], @y[6]
- pxor @t[3], @t[6] # restore t[6]
-
- pshufd \$0x93, @t[5], @t[5]
- pshufd \$0x93, @t[6], @t[6]
- pshufd \$0x93, @t[7], @t[7]
- pshufd \$0x93, @t[3], @t[3]
-
- # multiplication by 0x09
- pxor @y[1], @y[4]
- pxor @y[1], @t[1] # t[1]=y[1]
- pxor @t[5], @t[0] # clobber t[0]
- pxor @t[5], @t[1]
- pxor @t[0], @y[3]
- pxor @y[0], @t[0] # t[0]=y[0]
- pxor @t[6], @t[1]
- pxor @t[7], @t[6] # clobber t[6]
- pxor @t[1], @y[4]
- pxor @t[4], @y[7]
- pxor @y[4], @t[4] # t[4]=y[4]
- pxor @t[3], @y[6]
- pxor @y[3], @t[3] # t[3]=y[3]
- pxor @t[2], @y[5]
- pxor @y[2], @t[2] # t[2]=y[2]
- pxor @t[7], @t[3]
- pxor @y[5], @t[5] # t[5]=y[5]
- pxor @t[6], @t[2]
- pxor @t[6], @t[5]
- pxor @y[6], @t[6] # t[6]=y[6]
- pxor @y[7], @t[7] # t[7]=y[7]
-
- movdqa @t[0],@XMM[0]
- movdqa @t[1],@XMM[1]
- movdqa @t[2],@XMM[2]
- movdqa @t[3],@XMM[3]
- movdqa @t[4],@XMM[4]
- movdqa @t[5],@XMM[5]
- movdqa @t[6],@XMM[6]
- movdqa @t[7],@XMM[7]
-___
-}
-
-sub InvMixColumns {
-my @x=@_[0..7];
-my @t=@_[8..15];
-
-# Thanks to Jussi Kivilinna for providing pointer to
-#
-# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
-# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
-# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
-# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
-
-$code.=<<___;
- # multiplication by 0x05-0x00-0x04-0x00
- pshufd \$0x4E, @x[0], @t[0]
- pshufd \$0x4E, @x[6], @t[6]
- pxor @x[0], @t[0]
- pshufd \$0x4E, @x[7], @t[7]
- pxor @x[6], @t[6]
- pshufd \$0x4E, @x[1], @t[1]
- pxor @x[7], @t[7]
- pshufd \$0x4E, @x[2], @t[2]
- pxor @x[1], @t[1]
- pshufd \$0x4E, @x[3], @t[3]
- pxor @x[2], @t[2]
- pxor @t[6], @x[0]
- pxor @t[6], @x[1]
- pshufd \$0x4E, @x[4], @t[4]
- pxor @x[3], @t[3]
- pxor @t[0], @x[2]
- pxor @t[1], @x[3]
- pshufd \$0x4E, @x[5], @t[5]
- pxor @x[4], @t[4]
- pxor @t[7], @x[1]
- pxor @t[2], @x[4]
- pxor @x[5], @t[5]
-
- pxor @t[7], @x[2]
- pxor @t[6], @x[3]
- pxor @t[6], @x[4]
- pxor @t[3], @x[5]
- pxor @t[4], @x[6]
- pxor @t[7], @x[4]
- pxor @t[7], @x[5]
- pxor @t[5], @x[7]
-___
- &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
-}
-
-sub aesenc { # not used
-my @b=@_[0..7];
-my @t=@_[8..15];
-$code.=<<___;
- movdqa 0x30($const),@t[0] # .LSR
-___
- &ShiftRows (@b,@t[0]);
- &Sbox (@b,@t);
- &MixColumns (@b[0,1,4,6,3,7,2,5],@t);
-}
-
-sub aesenclast { # not used
-my @b=@_[0..7];
-my @t=@_[8..15];
-$code.=<<___;
- movdqa 0x40($const),@t[0] # .LSRM0
-___
- &ShiftRows (@b,@t[0]);
- &Sbox (@b,@t);
-$code.=<<___
- pxor 0x00($key),@b[0]
- pxor 0x10($key),@b[1]
- pxor 0x20($key),@b[4]
- pxor 0x30($key),@b[6]
- pxor 0x40($key),@b[3]
- pxor 0x50($key),@b[7]
- pxor 0x60($key),@b[2]
- pxor 0x70($key),@b[5]
-___
-}
-
-sub swapmove {
-my ($a,$b,$n,$mask,$t)=@_;
-$code.=<<___;
- movdqa $b,$t
- psrlq \$$n,$b
- pxor $a,$b
- pand $mask,$b
- pxor $b,$a
- psllq \$$n,$b
- pxor $t,$b
-___
-}
-sub swapmove2x {
-my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
-$code.=<<___;
- movdqa $b0,$t0
- psrlq \$$n,$b0
- movdqa $b1,$t1
- psrlq \$$n,$b1
- pxor $a0,$b0
- pxor $a1,$b1
- pand $mask,$b0
- pand $mask,$b1
- pxor $b0,$a0
- psllq \$$n,$b0
- pxor $b1,$a1
- psllq \$$n,$b1
- pxor $t0,$b0
- pxor $t1,$b1
-___
-}
-
-sub bitslice {
-my @x=reverse(@_[0..7]);
-my ($t0,$t1,$t2,$t3)=@_[8..11];
-$code.=<<___;
- movdqa 0x00($const),$t0 # .LBS0
- movdqa 0x10($const),$t1 # .LBS1
-___
- &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
- &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
-$code.=<<___;
- movdqa 0x20($const),$t0 # .LBS2
-___
- &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
- &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
-
- &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
- &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
-}
-
-$code.=<<___;
-.text
-
-.type _bsaes_encrypt8,\@abi-omnipotent
-.align 64
-_bsaes_encrypt8:
-.cfi_startproc
- lea .LBS0(%rip), $const # constants table
-
- movdqa ($key), @XMM[9] # round 0 key
- lea 0x10($key), $key
- movdqa 0x50($const), @XMM[8] # .LM0SR
- pxor @XMM[9], @XMM[0] # xor with round0 key
- pxor @XMM[9], @XMM[1]
- pxor @XMM[9], @XMM[2]
- pxor @XMM[9], @XMM[3]
- pshufb @XMM[8], @XMM[0]
- pshufb @XMM[8], @XMM[1]
- pxor @XMM[9], @XMM[4]
- pxor @XMM[9], @XMM[5]
- pshufb @XMM[8], @XMM[2]
- pshufb @XMM[8], @XMM[3]
- pxor @XMM[9], @XMM[6]
- pxor @XMM[9], @XMM[7]
- pshufb @XMM[8], @XMM[4]
- pshufb @XMM[8], @XMM[5]
- pshufb @XMM[8], @XMM[6]
- pshufb @XMM[8], @XMM[7]
-_bsaes_encrypt8_bitslice:
-___
- &bitslice (@XMM[0..7, 8..11]);
-$code.=<<___;
- dec $rounds
- jmp .Lenc_sbox
-.align 16
-.Lenc_loop:
-___
- &ShiftRows (@XMM[0..7, 8]);
-$code.=".Lenc_sbox:\n";
- &Sbox (@XMM[0..7, 8..15]);
-$code.=<<___;
- dec $rounds
- jl .Lenc_done
-___
- &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
-$code.=<<___;
- movdqa 0x30($const), @XMM[8] # .LSR
- jnz .Lenc_loop
- movdqa 0x40($const), @XMM[8] # .LSRM0
- jmp .Lenc_loop
-.align 16
-.Lenc_done:
-___
- # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
- &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
-$code.=<<___;
- movdqa ($key), @XMM[8] # last round key
- pxor @XMM[8], @XMM[4]
- pxor @XMM[8], @XMM[6]
- pxor @XMM[8], @XMM[3]
- pxor @XMM[8], @XMM[7]
- pxor @XMM[8], @XMM[2]
- pxor @XMM[8], @XMM[5]
- pxor @XMM[8], @XMM[0]
- pxor @XMM[8], @XMM[1]
- ret
-.cfi_endproc
-.size _bsaes_encrypt8,.-_bsaes_encrypt8
-
-.type _bsaes_decrypt8,\@abi-omnipotent
-.align 64
-_bsaes_decrypt8:
-.cfi_startproc
- lea .LBS0(%rip), $const # constants table
-
- movdqa ($key), @XMM[9] # round 0 key
- lea 0x10($key), $key
- movdqa -0x30($const), @XMM[8] # .LM0ISR
- pxor @XMM[9], @XMM[0] # xor with round0 key
- pxor @XMM[9], @XMM[1]
- pxor @XMM[9], @XMM[2]
- pxor @XMM[9], @XMM[3]
- pshufb @XMM[8], @XMM[0]
- pshufb @XMM[8], @XMM[1]
- pxor @XMM[9], @XMM[4]
- pxor @XMM[9], @XMM[5]
- pshufb @XMM[8], @XMM[2]
- pshufb @XMM[8], @XMM[3]
- pxor @XMM[9], @XMM[6]
- pxor @XMM[9], @XMM[7]
- pshufb @XMM[8], @XMM[4]
- pshufb @XMM[8], @XMM[5]
- pshufb @XMM[8], @XMM[6]
- pshufb @XMM[8], @XMM[7]
-___
- &bitslice (@XMM[0..7, 8..11]);
-$code.=<<___;
- dec $rounds
- jmp .Ldec_sbox
-.align 16
-.Ldec_loop:
-___
- &ShiftRows (@XMM[0..7, 8]);
-$code.=".Ldec_sbox:\n";
- &InvSbox (@XMM[0..7, 8..15]);
-$code.=<<___;
- dec $rounds
- jl .Ldec_done
-___
- &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
-$code.=<<___;
- movdqa -0x10($const), @XMM[8] # .LISR
- jnz .Ldec_loop
- movdqa -0x20($const), @XMM[8] # .LISRM0
- jmp .Ldec_loop
-.align 16
-.Ldec_done:
-___
- &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
-$code.=<<___;
- movdqa ($key), @XMM[8] # last round key
- pxor @XMM[8], @XMM[6]
- pxor @XMM[8], @XMM[4]
- pxor @XMM[8], @XMM[2]
- pxor @XMM[8], @XMM[7]
- pxor @XMM[8], @XMM[3]
- pxor @XMM[8], @XMM[5]
- pxor @XMM[8], @XMM[0]
- pxor @XMM[8], @XMM[1]
- ret
-.cfi_endproc
-.size _bsaes_decrypt8,.-_bsaes_decrypt8
-___
-}
-{
-my ($out,$inp,$rounds,$const)=("%rax","%rcx","%r10d","%r11");
-
-sub bitslice_key {
-my @x=reverse(@_[0..7]);
-my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
-
- &swapmove (@x[0,1],1,$bs0,$t2,$t3);
-$code.=<<___;
- #&swapmove(@x[2,3],1,$t0,$t2,$t3);
- movdqa @x[0], @x[2]
- movdqa @x[1], @x[3]
-___
- #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
-
- &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
-$code.=<<___;
- #&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
- movdqa @x[0], @x[4]
- movdqa @x[2], @x[6]
- movdqa @x[1], @x[5]
- movdqa @x[3], @x[7]
-___
- &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
- &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
-}
-
-$code.=<<___;
-.type _bsaes_key_convert,\@abi-omnipotent
-.align 16
-_bsaes_key_convert:
-.cfi_startproc
- lea .Lmasks(%rip), $const
- movdqu ($inp), %xmm7 # load round 0 key
- lea 0x10($inp), $inp
- movdqa 0x00($const), %xmm0 # 0x01...
- movdqa 0x10($const), %xmm1 # 0x02...
- movdqa 0x20($const), %xmm2 # 0x04...
- movdqa 0x30($const), %xmm3 # 0x08...
- movdqa 0x40($const), %xmm4 # .LM0
- pcmpeqd %xmm5, %xmm5 # .LNOT
-
- movdqu ($inp), %xmm6 # load round 1 key
- movdqa %xmm7, ($out) # save round 0 key
- lea 0x10($out), $out
- dec $rounds
- jmp .Lkey_loop
-.align 16
-.Lkey_loop:
- pshufb %xmm4, %xmm6 # .LM0
-
- movdqa %xmm0, %xmm8
- movdqa %xmm1, %xmm9
-
- pand %xmm6, %xmm8
- pand %xmm6, %xmm9
- movdqa %xmm2, %xmm10
- pcmpeqb %xmm0, %xmm8
- psllq \$4, %xmm0 # 0x10...
- movdqa %xmm3, %xmm11
- pcmpeqb %xmm1, %xmm9
- psllq \$4, %xmm1 # 0x20...
-
- pand %xmm6, %xmm10
- pand %xmm6, %xmm11
- movdqa %xmm0, %xmm12
- pcmpeqb %xmm2, %xmm10
- psllq \$4, %xmm2 # 0x40...
- movdqa %xmm1, %xmm13
- pcmpeqb %xmm3, %xmm11
- psllq \$4, %xmm3 # 0x80...
-
- movdqa %xmm2, %xmm14
- movdqa %xmm3, %xmm15
- pxor %xmm5, %xmm8 # "pnot"
- pxor %xmm5, %xmm9
-
- pand %xmm6, %xmm12
- pand %xmm6, %xmm13
- movdqa %xmm8, 0x00($out) # write bit-sliced round key
- pcmpeqb %xmm0, %xmm12
- psrlq \$4, %xmm0 # 0x01...
- movdqa %xmm9, 0x10($out)
- pcmpeqb %xmm1, %xmm13
- psrlq \$4, %xmm1 # 0x02...
- lea 0x10($inp), $inp
-
- pand %xmm6, %xmm14
- pand %xmm6, %xmm15
- movdqa %xmm10, 0x20($out)
- pcmpeqb %xmm2, %xmm14
- psrlq \$4, %xmm2 # 0x04...
- movdqa %xmm11, 0x30($out)
- pcmpeqb %xmm3, %xmm15
- psrlq \$4, %xmm3 # 0x08...
- movdqu ($inp), %xmm6 # load next round key
-
- pxor %xmm5, %xmm13 # "pnot"
- pxor %xmm5, %xmm14
- movdqa %xmm12, 0x40($out)
- movdqa %xmm13, 0x50($out)
- movdqa %xmm14, 0x60($out)
- movdqa %xmm15, 0x70($out)
- lea 0x80($out),$out
- dec $rounds
- jnz .Lkey_loop
-
- movdqa 0x50($const), %xmm7 # .L63
- #movdqa %xmm6, ($out) # don't save last round key
- ret
-.cfi_endproc
-.size _bsaes_key_convert,.-_bsaes_key_convert
-___
-}
-
-if (0 && !$win64) { # following four functions are unsupported interface
- # used for benchmarking...
-$code.=<<___;
-.globl bsaes_enc_key_convert
-.type bsaes_enc_key_convert,\@function,2
-.align 16
-bsaes_enc_key_convert:
- mov 240($inp),%r10d # pass rounds
- mov $inp,%rcx # pass key
- mov $out,%rax # pass key schedule
- call _bsaes_key_convert
- pxor %xmm6,%xmm7 # fix up last round key
- movdqa %xmm7,(%rax) # save last round key
- ret
-.size bsaes_enc_key_convert,.-bsaes_enc_key_convert
-
-.globl bsaes_encrypt_128
-.type bsaes_encrypt_128,\@function,4
-.align 16
-bsaes_encrypt_128:
-.Lenc128_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- movdqu 0x60($inp), @XMM[6]
- movdqu 0x70($inp), @XMM[7]
- mov $key, %rax # pass the $key
- lea 0x80($inp), $inp
- mov \$10,%r10d
-
- call _bsaes_encrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$0x80,$len
- ja .Lenc128_loop
- ret
-.size bsaes_encrypt_128,.-bsaes_encrypt_128
-
-.globl bsaes_dec_key_convert
-.type bsaes_dec_key_convert,\@function,2
-.align 16
-bsaes_dec_key_convert:
- mov 240($inp),%r10d # pass rounds
- mov $inp,%rcx # pass key
- mov $out,%rax # pass key schedule
- call _bsaes_key_convert
- pxor ($out),%xmm7 # fix up round 0 key
- movdqa %xmm6,(%rax) # save last round key
- movdqa %xmm7,($out)
- ret
-.size bsaes_dec_key_convert,.-bsaes_dec_key_convert
-
-.globl bsaes_decrypt_128
-.type bsaes_decrypt_128,\@function,4
-.align 16
-bsaes_decrypt_128:
-.Ldec128_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- movdqu 0x60($inp), @XMM[6]
- movdqu 0x70($inp), @XMM[7]
- mov $key, %rax # pass the $key
- lea 0x80($inp), $inp
- mov \$10,%r10d
-
- call _bsaes_decrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$0x80,$len
- ja .Ldec128_loop
- ret
-.size bsaes_decrypt_128,.-bsaes_decrypt_128
-___
-}
-{
-######################################################################
-#
-# OpenSSL interface
-#
-my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r10","%r11d")
- : ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
-my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15");
-
-if ($ecb) {
-$code.=<<___;
-.globl bsaes_ecb_encrypt_blocks
-.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent
-.align 16
-bsaes_ecb_encrypt_blocks:
-.cfi_startproc
- mov %rsp, %rax
-.Lecb_enc_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp),%rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lecb_enc_body:
-___
-$code.=<<___;
- mov %rsp,%rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov 240($arg4),%eax # rounds
- mov $arg1,$inp # backup arguments
- mov $arg2,$out
- mov $arg3,$len
- mov $arg4,$key
- cmp \$8,$arg3
- jb .Lecb_enc_short
-
- mov %eax,%ebx # backup rounds
- shl \$7,%rax # 128 bytes per inner round key
- sub \$`128-32`,%rax # size of bit-sliced key schedule
- sub %rax,%rsp
- mov %rsp,%rax # pass key schedule
- mov $key,%rcx # pass key
- mov %ebx,%r10d # pass rounds
- call _bsaes_key_convert
- pxor %xmm6,%xmm7 # fix up last round key
- movdqa %xmm7,(%rax) # save last round key
-
- sub \$8,$len
-.Lecb_enc_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- mov %rsp, %rax # pass key schedule
- movdqu 0x60($inp), @XMM[6]
- mov %ebx,%r10d # pass rounds
- movdqu 0x70($inp), @XMM[7]
- lea 0x80($inp), $inp
-
- call _bsaes_encrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$8,$len
- jnc .Lecb_enc_loop
-
- add \$8,$len
- jz .Lecb_enc_done
-
- movdqu 0x00($inp), @XMM[0] # load input
- mov %rsp, %rax # pass key schedule
- mov %ebx,%r10d # pass rounds
- cmp \$2,$len
- jb .Lecb_enc_one
- movdqu 0x10($inp), @XMM[1]
- je .Lecb_enc_two
- movdqu 0x20($inp), @XMM[2]
- cmp \$4,$len
- jb .Lecb_enc_three
- movdqu 0x30($inp), @XMM[3]
- je .Lecb_enc_four
- movdqu 0x40($inp), @XMM[4]
- cmp \$6,$len
- jb .Lecb_enc_five
- movdqu 0x50($inp), @XMM[5]
- je .Lecb_enc_six
- movdqu 0x60($inp), @XMM[6]
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_six:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_five:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_four:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_three:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_two:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_one:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_short:
- lea ($inp), $arg1
- lea ($out), $arg2
- lea ($key), $arg3
- call aes_nohw_encrypt
- lea 16($inp), $inp
- lea 16($out), $out
- dec $len
- jnz .Lecb_enc_short
-
-.Lecb_enc_done:
- lea (%rsp),%rax
- pxor %xmm0, %xmm0
-.Lecb_enc_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- jb .Lecb_enc_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lecb_enc_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lecb_enc_epilogue:
- ret
-.cfi_endproc
-.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
-
-.globl bsaes_ecb_decrypt_blocks
-.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent
-.align 16
-bsaes_ecb_decrypt_blocks:
-.cfi_startproc
- mov %rsp, %rax
-.Lecb_dec_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp),%rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lecb_dec_body:
-___
-$code.=<<___;
- mov %rsp,%rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov 240($arg4),%eax # rounds
- mov $arg1,$inp # backup arguments
- mov $arg2,$out
- mov $arg3,$len
- mov $arg4,$key
- cmp \$8,$arg3
- jb .Lecb_dec_short
-
- mov %eax,%ebx # backup rounds
- shl \$7,%rax # 128 bytes per inner round key
- sub \$`128-32`,%rax # size of bit-sliced key schedule
- sub %rax,%rsp
- mov %rsp,%rax # pass key schedule
- mov $key,%rcx # pass key
- mov %ebx,%r10d # pass rounds
- call _bsaes_key_convert
- pxor (%rsp),%xmm7 # fix up 0 round key
- movdqa %xmm6,(%rax) # save last round key
- movdqa %xmm7,(%rsp)
-
- sub \$8,$len
-.Lecb_dec_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- mov %rsp, %rax # pass key schedule
- movdqu 0x60($inp), @XMM[6]
- mov %ebx,%r10d # pass rounds
- movdqu 0x70($inp), @XMM[7]
- lea 0x80($inp), $inp
-
- call _bsaes_decrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$8,$len
- jnc .Lecb_dec_loop
-
- add \$8,$len
- jz .Lecb_dec_done
-
- movdqu 0x00($inp), @XMM[0] # load input
- mov %rsp, %rax # pass key schedule
- mov %ebx,%r10d # pass rounds
- cmp \$2,$len
- jb .Lecb_dec_one
- movdqu 0x10($inp), @XMM[1]
- je .Lecb_dec_two
- movdqu 0x20($inp), @XMM[2]
- cmp \$4,$len
- jb .Lecb_dec_three
- movdqu 0x30($inp), @XMM[3]
- je .Lecb_dec_four
- movdqu 0x40($inp), @XMM[4]
- cmp \$6,$len
- jb .Lecb_dec_five
- movdqu 0x50($inp), @XMM[5]
- je .Lecb_dec_six
- movdqu 0x60($inp), @XMM[6]
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_six:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_five:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_four:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_three:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_two:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_one:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_short:
- lea ($inp), $arg1
- lea ($out), $arg2
- lea ($key), $arg3
- call aes_nohw_decrypt
- lea 16($inp), $inp
- lea 16($out), $out
- dec $len
- jnz .Lecb_dec_short
-
-.Lecb_dec_done:
- lea (%rsp),%rax
- pxor %xmm0, %xmm0
-.Lecb_dec_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- jb .Lecb_dec_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lecb_dec_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lecb_dec_epilogue:
- ret
-.cfi_endproc
-.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
-___
-}
-$code.=<<___;
-.globl bsaes_cbc_encrypt
-.type bsaes_cbc_encrypt,\@abi-omnipotent
-.align 16
-bsaes_cbc_encrypt:
-.cfi_startproc
- # In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for
- # short inputs or if enc is one. We patch this out, using bsaes for all
- # input sizes. The caller is required to ensure enc is zero.
- mov %rsp, %rax
-.Lcbc_dec_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lcbc_dec_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov 240($arg4), %eax # rounds
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
- mov $arg5, %rbx
- shr \$4, $len # bytes to blocks
-
- mov %eax, %edx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %edx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor (%rsp),%xmm7 # fix up 0 round key
- movdqa %xmm6,(%rax) # save last round key
- movdqa %xmm7,(%rsp)
-
- movdqu (%rbx), @XMM[15] # load IV
- sub \$8,$len
- jc .Lcbc_dec_loop_done
-
-.Lcbc_dec_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- mov %rsp, %rax # pass key schedule
- movdqu 0x60($inp), @XMM[6]
- mov %edx,%r10d # pass rounds
- movdqu 0x70($inp), @XMM[7]
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
-
- call _bsaes_decrypt8
-
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[11], @XMM[2]
- movdqu 0x50($inp), @XMM[13]
- pxor @XMM[12], @XMM[7]
- movdqu 0x60($inp), @XMM[14]
- pxor @XMM[13], @XMM[3]
- movdqu 0x70($inp), @XMM[15] # IV
- pxor @XMM[14], @XMM[5]
- movdqu @XMM[0], 0x00($out) # write output
- lea 0x80($inp), $inp
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$8,$len
- jnc .Lcbc_dec_loop
-
-.Lcbc_dec_loop_done:
- add \$8,$len
- jz .Lcbc_dec_done
-
- movdqu 0x00($inp), @XMM[0] # load input
- mov %rsp, %rax # pass key schedule
- mov %edx, %r10d # pass rounds
- cmp \$2,$len
- jb .Lcbc_dec_one
- movdqu 0x10($inp), @XMM[1]
- je .Lcbc_dec_two
- movdqu 0x20($inp), @XMM[2]
- cmp \$4,$len
- jb .Lcbc_dec_three
- movdqu 0x30($inp), @XMM[3]
- je .Lcbc_dec_four
- movdqu 0x40($inp), @XMM[4]
- cmp \$6,$len
- jb .Lcbc_dec_five
- movdqu 0x50($inp), @XMM[5]
- je .Lcbc_dec_six
- movdqu 0x60($inp), @XMM[6]
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[11], @XMM[2]
- movdqu 0x50($inp), @XMM[13]
- pxor @XMM[12], @XMM[7]
- movdqu 0x60($inp), @XMM[15] # IV
- pxor @XMM[13], @XMM[3]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_six:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[11], @XMM[2]
- movdqu 0x50($inp), @XMM[15] # IV
- pxor @XMM[12], @XMM[7]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_five:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[15] # IV
- pxor @XMM[11], @XMM[2]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_four:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[15] # IV
- pxor @XMM[10], @XMM[4]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_three:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[15] # IV
- pxor @XMM[9], @XMM[6]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_two:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[15] # IV
- pxor @XMM[8], @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_one:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[15] # IV
- movdqu @XMM[0], 0x00($out) # write output
- jmp .Lcbc_dec_done
-
-.Lcbc_dec_done:
- movdqu @XMM[15], (%rbx) # return IV
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lcbc_dec_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lcbc_dec_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lcbc_dec_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lcbc_dec_epilogue:
- ret
-.cfi_endproc
-.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
-
-.globl bsaes_ctr32_encrypt_blocks
-.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
-.align 16
-bsaes_ctr32_encrypt_blocks:
-.cfi_startproc
-#ifndef NDEBUG
-#ifndef BORINGSSL_FIPS
-.extern BORINGSSL_function_hit
- movb \$1, BORINGSSL_function_hit+6(%rip)
-#endif
-#endif
- mov %rsp, %rax
-.Lctr_enc_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lctr_enc_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- movdqu ($arg5), %xmm0 # load counter
- mov 240($arg4), %eax # rounds
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
- movdqa %xmm0, 0x20(%rbp) # copy counter
- # In OpenSSL, short inputs fall back to aes_nohw_* here. We patch this
- # out to retain a constant-time implementation.
-
- mov %eax, %ebx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %ebx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor %xmm6,%xmm7 # fix up last round key
- movdqa %xmm7,(%rax) # save last round key
-
- movdqa (%rsp), @XMM[9] # load round0 key
- lea .LADD1(%rip), %r11
- movdqa 0x20(%rbp), @XMM[0] # counter copy
- movdqa -0x20(%r11), @XMM[8] # .LSWPUP
- pshufb @XMM[8], @XMM[9] # byte swap upper part
- pshufb @XMM[8], @XMM[0]
- movdqa @XMM[9], (%rsp) # save adjusted round0 key
- jmp .Lctr_enc_loop
-.align 16
-.Lctr_enc_loop:
- movdqa @XMM[0], 0x20(%rbp) # save counter
- movdqa @XMM[0], @XMM[1] # prepare 8 counter values
- movdqa @XMM[0], @XMM[2]
- paddd 0x00(%r11), @XMM[1] # .LADD1
- movdqa @XMM[0], @XMM[3]
- paddd 0x10(%r11), @XMM[2] # .LADD2
- movdqa @XMM[0], @XMM[4]
- paddd 0x20(%r11), @XMM[3] # .LADD3
- movdqa @XMM[0], @XMM[5]
- paddd 0x30(%r11), @XMM[4] # .LADD4
- movdqa @XMM[0], @XMM[6]
- paddd 0x40(%r11), @XMM[5] # .LADD5
- movdqa @XMM[0], @XMM[7]
- paddd 0x50(%r11), @XMM[6] # .LADD6
- paddd 0x60(%r11), @XMM[7] # .LADD7
-
- # Borrow prologue from _bsaes_encrypt8 to use the opportunity
- # to flip byte order in 32-bit counter
- movdqa (%rsp), @XMM[9] # round 0 key
- lea 0x10(%rsp), %rax # pass key schedule
- movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR
- pxor @XMM[9], @XMM[0] # xor with round0 key
- pxor @XMM[9], @XMM[1]
- pxor @XMM[9], @XMM[2]
- pxor @XMM[9], @XMM[3]
- pshufb @XMM[8], @XMM[0]
- pshufb @XMM[8], @XMM[1]
- pxor @XMM[9], @XMM[4]
- pxor @XMM[9], @XMM[5]
- pshufb @XMM[8], @XMM[2]
- pshufb @XMM[8], @XMM[3]
- pxor @XMM[9], @XMM[6]
- pxor @XMM[9], @XMM[7]
- pshufb @XMM[8], @XMM[4]
- pshufb @XMM[8], @XMM[5]
- pshufb @XMM[8], @XMM[6]
- pshufb @XMM[8], @XMM[7]
- lea .LBS0(%rip), %r11 # constants table
- mov %ebx,%r10d # pass rounds
-
- call _bsaes_encrypt8_bitslice
-
- sub \$8,$len
- jc .Lctr_enc_loop_done
-
- movdqu 0x00($inp), @XMM[8] # load input
- movdqu 0x10($inp), @XMM[9]
- movdqu 0x20($inp), @XMM[10]
- movdqu 0x30($inp), @XMM[11]
- movdqu 0x40($inp), @XMM[12]
- movdqu 0x50($inp), @XMM[13]
- movdqu 0x60($inp), @XMM[14]
- movdqu 0x70($inp), @XMM[15]
- lea 0x80($inp),$inp
- pxor @XMM[0], @XMM[8]
- movdqa 0x20(%rbp), @XMM[0] # load counter
- pxor @XMM[9], @XMM[1]
- movdqu @XMM[8], 0x00($out) # write output
- pxor @XMM[10], @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor @XMM[11], @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor @XMM[12], @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor @XMM[13], @XMM[7]
- movdqu @XMM[3], 0x40($out)
- pxor @XMM[14], @XMM[2]
- movdqu @XMM[7], 0x50($out)
- pxor @XMM[15], @XMM[5]
- movdqu @XMM[2], 0x60($out)
- lea .LADD1(%rip), %r11
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- paddd 0x70(%r11), @XMM[0] # .LADD8
- jnz .Lctr_enc_loop
-
- jmp .Lctr_enc_done
-.align 16
-.Lctr_enc_loop_done:
- add \$8, $len
- movdqu 0x00($inp), @XMM[8] # load input
- pxor @XMM[8], @XMM[0]
- movdqu @XMM[0], 0x00($out) # write output
- cmp \$2,$len
- jb .Lctr_enc_done
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[9], @XMM[1]
- movdqu @XMM[1], 0x10($out)
- je .Lctr_enc_done
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[10], @XMM[4]
- movdqu @XMM[4], 0x20($out)
- cmp \$4,$len
- jb .Lctr_enc_done
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[11], @XMM[6]
- movdqu @XMM[6], 0x30($out)
- je .Lctr_enc_done
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[12], @XMM[3]
- movdqu @XMM[3], 0x40($out)
- cmp \$6,$len
- jb .Lctr_enc_done
- movdqu 0x50($inp), @XMM[13]
- pxor @XMM[13], @XMM[7]
- movdqu @XMM[7], 0x50($out)
- je .Lctr_enc_done
- movdqu 0x60($inp), @XMM[14]
- pxor @XMM[14], @XMM[2]
- movdqu @XMM[2], 0x60($out)
-
- # OpenSSL contains aes_nohw_* fallback code here. We patch this
- # out to retain a constant-time implementation.
-.Lctr_enc_done:
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lctr_enc_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lctr_enc_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lctr_enc_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lctr_enc_epilogue:
- ret
-.cfi_endproc
-.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
-___
-######################################################################
-# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,
-# const unsigned char iv[16]);
-#
-# We patch out the XTS implementation in BoringSSL.
-if ($xts) {
-my ($twmask,$twres,$twtmp)=@XMM[13..15];
-$arg6=~s/d$//;
-
-$code.=<<___;
-.globl bsaes_xts_encrypt
-.type bsaes_xts_encrypt,\@abi-omnipotent
-.align 16
-bsaes_xts_encrypt:
-.cfi_startproc
- mov %rsp, %rax
-.Lxts_enc_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull key2
- mov 0xa8(%rsp),$arg6 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lxts_enc_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
-
- lea ($arg6), $arg1
- lea 0x20(%rbp), $arg2
- lea ($arg5), $arg3
- call aes_nohw_encrypt # generate initial tweak
-
- mov 240($key), %eax # rounds
- mov $len, %rbx # backup $len
-
- mov %eax, %edx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %edx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor %xmm6, %xmm7 # fix up last round key
- movdqa %xmm7, (%rax) # save last round key
-
- and \$-16, $len
- sub \$0x80, %rsp # place for tweak[8]
- movdqa 0x20(%rbp), @XMM[7] # initial tweak
-
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
-
- sub \$0x80, $len
- jc .Lxts_enc_short
- jmp .Lxts_enc_loop
-
-.align 16
-.Lxts_enc_loop:
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqu 0x70($inp), @XMM[8+7]
- lea 0x80($inp), $inp
- movdqa @XMM[7], 0x70(%rsp)
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- pxor @XMM[8+7], @XMM[7]
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[3], 0x40($out)
- pxor 0x60(%rsp), @XMM[2]
- movdqu @XMM[7], 0x50($out)
- pxor 0x70(%rsp), @XMM[5]
- movdqu @XMM[2], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-
- sub \$0x80,$len
- jnc .Lxts_enc_loop
-
-.Lxts_enc_short:
- add \$0x80, $len
- jz .Lxts_enc_done
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
- cmp \$`0x10*$i`,$len
- je .Lxts_enc_$i
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqa @XMM[7], 0x70(%rsp)
- lea 0x70($inp), $inp
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[3], 0x40($out)
- pxor 0x60(%rsp), @XMM[2]
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- lea 0x70($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_6:
- pxor @XMM[8+4], @XMM[4]
- lea 0x60($inp), $inp
- pxor @XMM[8+5], @XMM[5]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- lea 0x60($out), $out
-
- movdqa 0x60(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_5:
- pxor @XMM[8+3], @XMM[3]
- lea 0x50($inp), $inp
- pxor @XMM[8+4], @XMM[4]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- lea 0x50($out), $out
-
- movdqa 0x50(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_4:
- pxor @XMM[8+2], @XMM[2]
- lea 0x40($inp), $inp
- pxor @XMM[8+3], @XMM[3]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- lea 0x40($out), $out
-
- movdqa 0x40(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_3:
- pxor @XMM[8+1], @XMM[1]
- lea 0x30($inp), $inp
- pxor @XMM[8+2], @XMM[2]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- lea 0x30($out), $out
-
- movdqa 0x30(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_2:
- pxor @XMM[8+0], @XMM[0]
- lea 0x20($inp), $inp
- pxor @XMM[8+1], @XMM[1]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- lea 0x20($out), $out
-
- movdqa 0x20(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_1:
- pxor @XMM[0], @XMM[8]
- lea 0x10($inp), $inp
- movdqa @XMM[8], 0x20(%rbp)
- lea 0x20(%rbp), $arg1
- lea 0x20(%rbp), $arg2
- lea ($key), $arg3
- call aes_nohw_encrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
- #pxor @XMM[8], @XMM[0]
- #lea 0x80(%rsp), %rax # pass key schedule
- #mov %edx, %r10d # pass rounds
- #call _bsaes_encrypt8
- #pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- movdqu @XMM[0], 0x00($out) # write output
- lea 0x10($out), $out
-
- movdqa 0x10(%rsp), @XMM[7] # next iteration tweak
-
-.Lxts_enc_done:
- and \$15, %ebx
- jz .Lxts_enc_ret
- mov $out, %rdx
-
-.Lxts_enc_steal:
- movzb ($inp), %eax
- movzb -16(%rdx), %ecx
- lea 1($inp), $inp
- mov %al, -16(%rdx)
- mov %cl, 0(%rdx)
- lea 1(%rdx), %rdx
- sub \$1,%ebx
- jnz .Lxts_enc_steal
-
- movdqu -16($out), @XMM[0]
- lea 0x20(%rbp), $arg1
- pxor @XMM[7], @XMM[0]
- lea 0x20(%rbp), $arg2
- movdqa @XMM[0], 0x20(%rbp)
- lea ($key), $arg3
- call aes_nohw_encrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[7]
- movdqu @XMM[7], -16($out)
-
-.Lxts_enc_ret:
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lxts_enc_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lxts_enc_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lxts_enc_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lxts_enc_epilogue:
- ret
-.cfi_endproc
-.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
-
-.globl bsaes_xts_decrypt
-.type bsaes_xts_decrypt,\@abi-omnipotent
-.align 16
-bsaes_xts_decrypt:
-.cfi_startproc
- mov %rsp, %rax
-.Lxts_dec_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull key2
- mov 0xa8(%rsp),$arg6 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lxts_dec_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
-
- lea ($arg6), $arg1
- lea 0x20(%rbp), $arg2
- lea ($arg5), $arg3
- call aes_nohw_encrypt # generate initial tweak
-
- mov 240($key), %eax # rounds
- mov $len, %rbx # backup $len
-
- mov %eax, %edx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %edx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor (%rsp), %xmm7 # fix up round 0 key
- movdqa %xmm6, (%rax) # save last round key
- movdqa %xmm7, (%rsp)
-
- xor %eax, %eax # if ($len%16) len-=16;
- and \$-16, $len
- test \$15, %ebx
- setnz %al
- shl \$4, %rax
- sub %rax, $len
-
- sub \$0x80, %rsp # place for tweak[8]
- movdqa 0x20(%rbp), @XMM[7] # initial tweak
-
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
-
- sub \$0x80, $len
- jc .Lxts_dec_short
- jmp .Lxts_dec_loop
-
-.align 16
-.Lxts_dec_loop:
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqu 0x70($inp), @XMM[8+7]
- lea 0x80($inp), $inp
- movdqa @XMM[7], 0x70(%rsp)
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- pxor @XMM[8+7], @XMM[7]
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[2], 0x40($out)
- pxor 0x60(%rsp), @XMM[3]
- movdqu @XMM[7], 0x50($out)
- pxor 0x70(%rsp), @XMM[5]
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-
- sub \$0x80,$len
- jnc .Lxts_dec_loop
-
-.Lxts_dec_short:
- add \$0x80, $len
- jz .Lxts_dec_done
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
- cmp \$`0x10*$i`,$len
- je .Lxts_dec_$i
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqa @XMM[7], 0x70(%rsp)
- lea 0x70($inp), $inp
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[2], 0x40($out)
- pxor 0x60(%rsp), @XMM[3]
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- lea 0x70($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_6:
- pxor @XMM[8+4], @XMM[4]
- lea 0x60($inp), $inp
- pxor @XMM[8+5], @XMM[5]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- lea 0x60($out), $out
-
- movdqa 0x60(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_5:
- pxor @XMM[8+3], @XMM[3]
- lea 0x50($inp), $inp
- pxor @XMM[8+4], @XMM[4]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- lea 0x50($out), $out
-
- movdqa 0x50(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_4:
- pxor @XMM[8+2], @XMM[2]
- lea 0x40($inp), $inp
- pxor @XMM[8+3], @XMM[3]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- lea 0x40($out), $out
-
- movdqa 0x40(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_3:
- pxor @XMM[8+1], @XMM[1]
- lea 0x30($inp), $inp
- pxor @XMM[8+2], @XMM[2]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- lea 0x30($out), $out
-
- movdqa 0x30(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_2:
- pxor @XMM[8+0], @XMM[0]
- lea 0x20($inp), $inp
- pxor @XMM[8+1], @XMM[1]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- lea 0x20($out), $out
-
- movdqa 0x20(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_1:
- pxor @XMM[0], @XMM[8]
- lea 0x10($inp), $inp
- movdqa @XMM[8], 0x20(%rbp)
- lea 0x20(%rbp), $arg1
- lea 0x20(%rbp), $arg2
- lea ($key), $arg3
- call aes_nohw_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
- #pxor @XMM[8], @XMM[0]
- #lea 0x80(%rsp), %rax # pass key schedule
- #mov %edx, %r10d # pass rounds
- #call _bsaes_decrypt8
- #pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- movdqu @XMM[0], 0x00($out) # write output
- lea 0x10($out), $out
-
- movdqa 0x10(%rsp), @XMM[7] # next iteration tweak
-
-.Lxts_dec_done:
- and \$15, %ebx
- jz .Lxts_dec_ret
-
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp
- pshufd \$0x13, $twtmp, $twres
- movdqa @XMM[7], @XMM[6]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- movdqu ($inp), @XMM[0]
- pxor $twres, @XMM[7]
-
- lea 0x20(%rbp), $arg1
- pxor @XMM[7], @XMM[0]
- lea 0x20(%rbp), $arg2
- movdqa @XMM[0], 0x20(%rbp)
- lea ($key), $arg3
- call aes_nohw_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[7]
- mov $out, %rdx
- movdqu @XMM[7], ($out)
-
-.Lxts_dec_steal:
- movzb 16($inp), %eax
- movzb (%rdx), %ecx
- lea 1($inp), $inp
- mov %al, (%rdx)
- mov %cl, 16(%rdx)
- lea 1(%rdx), %rdx
- sub \$1,%ebx
- jnz .Lxts_dec_steal
-
- movdqu ($out), @XMM[0]
- lea 0x20(%rbp), $arg1
- pxor @XMM[6], @XMM[0]
- lea 0x20(%rbp), $arg2
- movdqa @XMM[0], 0x20(%rbp)
- lea ($key), $arg3
- call aes_nohw_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[6]
- movdqu @XMM[6], ($out)
-
-.Lxts_dec_ret:
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lxts_dec_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lxts_dec_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lxts_dec_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lxts_dec_epilogue:
- ret
-.cfi_endproc
-.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
-___
-}
-} # $xts
-$code.=<<___;
-.type _bsaes_const,\@object
-.align 64
-_bsaes_const:
-.LM0ISR: # InvShiftRows constants
- .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
-.LISRM0:
- .quad 0x01040b0e0205080f, 0x0306090c00070a0d
-.LISR:
- .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
-.LBS0: # bit-slice constants
- .quad 0x5555555555555555, 0x5555555555555555
-.LBS1:
- .quad 0x3333333333333333, 0x3333333333333333
-.LBS2:
- .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
-.LSR: # shiftrows constants
- .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
-.LSRM0:
- .quad 0x0304090e00050a0f, 0x01060b0c0207080d
-.LM0SR:
- .quad 0x0a0e02060f03070b, 0x0004080c05090d01
-.LSWPUP: # byte-swap upper dword
- .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
-.LSWPUPM0SR:
- .quad 0x0a0d02060c03070b, 0x0004080f05090e01
-.LADD1: # counter increment constants
- .quad 0x0000000000000000, 0x0000000100000000
-.LADD2:
- .quad 0x0000000000000000, 0x0000000200000000
-.LADD3:
- .quad 0x0000000000000000, 0x0000000300000000
-.LADD4:
- .quad 0x0000000000000000, 0x0000000400000000
-.LADD5:
- .quad 0x0000000000000000, 0x0000000500000000
-.LADD6:
- .quad 0x0000000000000000, 0x0000000600000000
-.LADD7:
- .quad 0x0000000000000000, 0x0000000700000000
-.LADD8:
- .quad 0x0000000000000000, 0x0000000800000000
-.Lxts_magic:
- .long 0x87,0,1,0
-.Lmasks:
- .quad 0x0101010101010101, 0x0101010101010101
- .quad 0x0202020202020202, 0x0202020202020202
- .quad 0x0404040404040404, 0x0404040404040404
- .quad 0x0808080808080808, 0x0808080808080808
-.LM0:
- .quad 0x02060a0e03070b0f, 0x0004080c0105090d
-.L63:
- .quad 0x6363636363636363, 0x6363636363636363
-.asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov"
-.align 64
-.size _bsaes_const,.-_bsaes_const
-___
-
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
-
-$code.=<<___;
-.extern __imp_RtlVirtualUnwind
-.type se_handler,\@abi-omnipotent
-.align 16
-se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
-
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # prologue label
- cmp %r10,%rbx # context->Rip<=prologue label
- jbe .Lin_prologue
-
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lin_prologue
-
- mov 8(%r11),%r10d # HandlerData[2]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=tail label
- jae .Lin_tail
-
- mov 160($context),%rax # pull context->Rbp
-
- lea 0x40(%rax),%rsi # %xmm save area
- lea 512($context),%rdi # &context.Xmm6
- mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
- .long 0xa548f3fc # cld; rep movsq
- lea 0xa0+0x78(%rax),%rax # adjust stack pointer
-
-.Lin_tail:
- mov -48(%rax),%rbp
- mov -40(%rax),%rbx
- mov -32(%rax),%r12
- mov -24(%rax),%r13
- mov -16(%rax),%r14
- mov -8(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_prologue:
- mov %rax,152($context) # restore context->Rsp
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$`1232/8`,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size se_handler,.-se_handler
-
-.section .pdata
-.align 4
-___
-$code.=<<___ if ($ecb);
- .rva .Lecb_enc_prologue
- .rva .Lecb_enc_epilogue
- .rva .Lecb_enc_info
-
- .rva .Lecb_dec_prologue
- .rva .Lecb_dec_epilogue
- .rva .Lecb_dec_info
-___
-$code.=<<___;
- .rva .Lcbc_dec_prologue
- .rva .Lcbc_dec_epilogue
- .rva .Lcbc_dec_info
-
- .rva .Lctr_enc_prologue
- .rva .Lctr_enc_epilogue
- .rva .Lctr_enc_info
-___
-$code.=<<___ if ($xts);
- .rva .Lxts_enc_prologue
- .rva .Lxts_enc_epilogue
- .rva .Lxts_enc_info
-
- .rva .Lxts_dec_prologue
- .rva .Lxts_dec_epilogue
- .rva .Lxts_dec_info
-___
-$code.=<<___;
-
-.section .xdata
-.align 8
-___
-$code.=<<___ if ($ecb);
-.Lecb_enc_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lecb_enc_body,.Lecb_enc_epilogue # HandlerData[]
- .rva .Lecb_enc_tail
- .long 0
-.Lecb_dec_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lecb_dec_body,.Lecb_dec_epilogue # HandlerData[]
- .rva .Lecb_dec_tail
- .long 0
-___
-$code.=<<___;
-.Lcbc_dec_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lcbc_dec_body,.Lcbc_dec_epilogue # HandlerData[]
- .rva .Lcbc_dec_tail
- .long 0
-.Lctr_enc_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[]
- .rva .Lctr_enc_tail
- .long 0
-___
-$code.=<<___ if ($xts);
-.Lxts_enc_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[]
- .rva .Lxts_enc_tail
- .long 0
-.Lxts_dec_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[]
- .rva .Lxts_dec_tail
- .long 0
-___
-}
-
-$code =~ s/\`([^\`]*)\`/eval($1)/gem;
-
-print $code;
-
-close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
index 47d9972f..9429344b 100644
--- a/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
@@ -176,6 +176,181 @@ _vpaes_encrypt_core:
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
##
+## _aes_encrypt_core_2x
+##
+## AES-encrypt %xmm0 and %xmm6 in parallel.
+##
+## Inputs:
+## %xmm0 and %xmm6 = input
+## %xmm12-%xmm15 as in _vpaes_preheat
+## (%rdx) = scheduled keys
+##
+## Output in %xmm0 and %xmm6
+## Clobbers %xmm1-%xmm5, %xmm7-%xmm11, %r9, %r10, %r11, %rax
+## Preserves %xmm14 and %xmm15
+##
+## This function stitches two parallel instances of _vpaes_encrypt_core. x86_64
+## provides 16 XMM registers. _vpaes_encrypt_core computes over six registers
+## (%xmm0-%xmm5) and additionally uses seven registers with preloaded constants
+## from _vpaes_preheat (%xmm9-%xmm15). This does not quite fit two instances,
+## so we spill some of %xmm9 through %xmm15 back to memory. We keep %xmm9 and
+## %xmm10 in registers as these values are used several times in a row. The
+## remainder are read once per round and are spilled to memory. This leaves two
+## registers preserved for the caller.
+##
+## Thus, of the two _vpaes_encrypt_core instances, the first uses (%xmm0-%xmm5)
+## as before. The second uses %xmm6-%xmm8,%xmm11-%xmm13. (Add 6 to %xmm2 and
+## below. Add 8 to %xmm3 and up.) Instructions in the second instance are
+## indented by one space.
+##
+##
+.type _vpaes_encrypt_core_2x,\@abi-omnipotent
+.align 16
+_vpaes_encrypt_core_2x:
+.cfi_startproc
+ mov %rdx, %r9
+ mov \$16, %r11
+ mov 240(%rdx),%eax
+ movdqa %xmm9, %xmm1
+ movdqa %xmm9, %xmm7
+ movdqa .Lk_ipt(%rip), %xmm2 # iptlo
+ movdqa %xmm2, %xmm8
+ pandn %xmm0, %xmm1
+ pandn %xmm6, %xmm7
+ movdqu (%r9), %xmm5 # round0 key
+ # Also use %xmm5 in the second instance.
+ psrld \$4, %xmm1
+ psrld \$4, %xmm7
+ pand %xmm9, %xmm0
+ pand %xmm9, %xmm6
+ pshufb %xmm0, %xmm2
+ pshufb %xmm6, %xmm8
+ movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi
+ movdqa %xmm0, %xmm6
+ pshufb %xmm1, %xmm0
+ pshufb %xmm7, %xmm6
+ pxor %xmm5, %xmm2
+ pxor %xmm5, %xmm8
+ add \$16, %r9
+ pxor %xmm2, %xmm0
+ pxor %xmm8, %xmm6
+ lea .Lk_mc_backward(%rip),%r10
+ jmp .Lenc2x_entry
+
+.align 16
+.Lenc2x_loop:
+ # middle of middle round
+ movdqa .Lk_sb1(%rip), %xmm4 # 4 : sb1u
+ movdqa .Lk_sb1+16(%rip),%xmm0 # 0 : sb1t
+ movdqa %xmm4, %xmm12
+ movdqa %xmm0, %xmm6
+ pshufb %xmm2, %xmm4 # 4 = sb1u
+ pshufb %xmm8, %xmm12
+ pshufb %xmm3, %xmm0 # 0 = sb1t
+ pshufb %xmm11, %xmm6
+ pxor %xmm5, %xmm4 # 4 = sb1u + k
+ pxor %xmm5, %xmm12
+ movdqa .Lk_sb2(%rip), %xmm5 # 4 : sb2u
+ movdqa %xmm5, %xmm13
+ pxor %xmm4, %xmm0 # 0 = A
+ pxor %xmm12, %xmm6
+ movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
+ # Also use %xmm1 in the second instance.
+ pshufb %xmm2, %xmm5 # 4 = sb2u
+ pshufb %xmm8, %xmm13
+ movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
+ # Also use %xmm4 in the second instance.
+ movdqa .Lk_sb2+16(%rip), %xmm2 # 2 : sb2t
+ movdqa %xmm2, %xmm8
+ pshufb %xmm3, %xmm2 # 2 = sb2t
+ pshufb %xmm11, %xmm8
+ movdqa %xmm0, %xmm3 # 3 = A
+ movdqa %xmm6, %xmm11
+ pxor %xmm5, %xmm2 # 2 = 2A
+ pxor %xmm13, %xmm8
+ pshufb %xmm1, %xmm0 # 0 = B
+ pshufb %xmm1, %xmm6
+ add \$16, %r9 # next key
+ pxor %xmm2, %xmm0 # 0 = 2A+B
+ pxor %xmm8, %xmm6
+ pshufb %xmm4, %xmm3 # 3 = D
+ pshufb %xmm4, %xmm11
+ add \$16, %r11 # next mc
+ pxor %xmm0, %xmm3 # 3 = 2A+B+D
+ pxor %xmm6, %xmm11
+ pshufb %xmm1, %xmm0 # 0 = 2B+C
+ pshufb %xmm1, %xmm6
+ and \$0x30, %r11 # ... mod 4
+ sub \$1,%rax # nr--
+ pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D
+ pxor %xmm11, %xmm6
+
+.Lenc2x_entry:
+ # top of round
+ movdqa %xmm9, %xmm1 # 1 : i
+ movdqa %xmm9, %xmm7
+ movdqa .Lk_inv+16(%rip), %xmm5 # 2 : a/k
+ movdqa %xmm5, %xmm13
+ pandn %xmm0, %xmm1 # 1 = i<<4
+ pandn %xmm6, %xmm7
+ psrld \$4, %xmm1 # 1 = i
+ psrld \$4, %xmm7
+ pand %xmm9, %xmm0 # 0 = k
+ pand %xmm9, %xmm6
+ pshufb %xmm0, %xmm5 # 2 = a/k
+ pshufb %xmm6, %xmm13
+ movdqa %xmm10, %xmm3 # 3 : 1/i
+ movdqa %xmm10, %xmm11
+ pxor %xmm1, %xmm0 # 0 = j
+ pxor %xmm7, %xmm6
+ pshufb %xmm1, %xmm3 # 3 = 1/i
+ pshufb %xmm7, %xmm11
+ movdqa %xmm10, %xmm4 # 4 : 1/j
+ movdqa %xmm10, %xmm12
+ pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k
+ pxor %xmm13, %xmm11
+ pshufb %xmm0, %xmm4 # 4 = 1/j
+ pshufb %xmm6, %xmm12
+ movdqa %xmm10, %xmm2 # 2 : 1/iak
+ movdqa %xmm10, %xmm8
+ pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k
+ pxor %xmm13, %xmm12
+ pshufb %xmm3, %xmm2 # 2 = 1/iak
+ pshufb %xmm11, %xmm8
+ movdqa %xmm10, %xmm3 # 3 : 1/jak
+ movdqa %xmm10, %xmm11
+ pxor %xmm0, %xmm2 # 2 = io
+ pxor %xmm6, %xmm8
+ pshufb %xmm4, %xmm3 # 3 = 1/jak
+ pshufb %xmm12, %xmm11
+ movdqu (%r9), %xmm5
+ # Also use %xmm5 in the second instance.
+ pxor %xmm1, %xmm3 # 3 = jo
+ pxor %xmm7, %xmm11
+ jnz .Lenc2x_loop
+
+ # middle of last round
+ movdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
+ movdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
+ movdqa %xmm4, %xmm12
+ movdqa %xmm0, %xmm6
+ pshufb %xmm2, %xmm4 # 4 = sbou
+ pshufb %xmm8, %xmm12
+ pxor %xmm5, %xmm4 # 4 = sb1u + k
+ pxor %xmm5, %xmm12
+ pshufb %xmm3, %xmm0 # 0 = sb1t
+ pshufb %xmm11, %xmm6
+ movdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
+ # Also use %xmm1 in the second instance.
+ pxor %xmm4, %xmm0 # 0 = A
+ pxor %xmm12, %xmm6
+ pshufb %xmm1, %xmm0
+ pshufb %xmm1, %xmm6
+ ret
+.cfi_endproc
+.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x
+
+##
## Decryption core
##
## Same API as encryption core.
@@ -984,6 +1159,111 @@ $code.=<<___;
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
___
}
+{
+my ($inp,$out,$blocks,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx","%r8");
+# void vpaes_ctr32_encrypt_blocks(const uint8_t *inp, uint8_t *out,
+# size_t blocks, const AES_KEY *key,
+# const uint8_t ivp[16]);
+$code.=<<___;
+.globl ${PREFIX}_ctr32_encrypt_blocks
+.type ${PREFIX}_ctr32_encrypt_blocks,\@function,5
+.align 16
+${PREFIX}_ctr32_encrypt_blocks:
+.cfi_startproc
+ # _vpaes_encrypt_core and _vpaes_encrypt_core_2x expect the key in %rdx.
+ xchg $key, $blocks
+___
+($blocks,$key)=($key,$blocks);
+$code.=<<___;
+ test $blocks, $blocks
+ jz .Lctr32_abort
+___
+$code.=<<___ if ($win64);
+ lea -0xb8(%rsp),%rsp
+ movaps %xmm6,0x10(%rsp)
+ movaps %xmm7,0x20(%rsp)
+ movaps %xmm8,0x30(%rsp)
+ movaps %xmm9,0x40(%rsp)
+ movaps %xmm10,0x50(%rsp)
+ movaps %xmm11,0x60(%rsp)
+ movaps %xmm12,0x70(%rsp)
+ movaps %xmm13,0x80(%rsp)
+ movaps %xmm14,0x90(%rsp)
+ movaps %xmm15,0xa0(%rsp)
+.Lctr32_body:
+___
+$code.=<<___;
+ movdqu ($ivp), %xmm0 # Load IV.
+ movdqa .Lctr_add_one(%rip), %xmm8
+ sub $inp, $out # This allows only incrementing $inp.
+ call _vpaes_preheat
+ movdqa %xmm0, %xmm6
+ pshufb .Lrev_ctr(%rip), %xmm6
+
+ test \$1, $blocks
+ jz .Lctr32_prep_loop
+
+ # Handle one block so the remaining block count is even for
+ # _vpaes_encrypt_core_2x.
+ movdqu ($inp), %xmm7 # Load input.
+ call _vpaes_encrypt_core
+ pxor %xmm7, %xmm0
+ paddd %xmm8, %xmm6
+ movdqu %xmm0, ($out,$inp)
+ sub \$1, $blocks
+ lea 16($inp), $inp
+ jz .Lctr32_done
+
+.Lctr32_prep_loop:
+ # _vpaes_encrypt_core_2x leaves only %xmm14 and %xmm15 as spare
+ # registers. We maintain two byte-swapped counters in them.
+ movdqa %xmm6, %xmm14
+ movdqa %xmm6, %xmm15
+ paddd %xmm8, %xmm15
+
+.Lctr32_loop:
+ movdqa .Lrev_ctr(%rip), %xmm1 # Set up counters.
+ movdqa %xmm14, %xmm0
+ movdqa %xmm15, %xmm6
+ pshufb %xmm1, %xmm0
+ pshufb %xmm1, %xmm6
+ call _vpaes_encrypt_core_2x
+ movdqu ($inp), %xmm1 # Load input.
+ movdqu 16($inp), %xmm2
+ movdqa .Lctr_add_two(%rip), %xmm3
+ pxor %xmm1, %xmm0 # XOR input.
+ pxor %xmm2, %xmm6
+ paddd %xmm3, %xmm14 # Increment counters.
+ paddd %xmm3, %xmm15
+ movdqu %xmm0, ($out,$inp) # Write output.
+ movdqu %xmm6, 16($out,$inp)
+ sub \$2, $blocks # Advance loop.
+ lea 32($inp), $inp
+ jnz .Lctr32_loop
+
+.Lctr32_done:
+___
+$code.=<<___ if ($win64);
+ movaps 0x10(%rsp),%xmm6
+ movaps 0x20(%rsp),%xmm7
+ movaps 0x30(%rsp),%xmm8
+ movaps 0x40(%rsp),%xmm9
+ movaps 0x50(%rsp),%xmm10
+ movaps 0x60(%rsp),%xmm11
+ movaps 0x70(%rsp),%xmm12
+ movaps 0x80(%rsp),%xmm13
+ movaps 0x90(%rsp),%xmm14
+ movaps 0xa0(%rsp),%xmm15
+ lea 0xb8(%rsp),%rsp
+.Lctr32_epilogue:
+___
+$code.=<<___;
+.Lctr32_abort:
+ ret
+.cfi_endproc
+.size ${PREFIX}_ctr32_encrypt_blocks,.-${PREFIX}_ctr32_encrypt_blocks
+___
+}
$code.=<<___;
##
## _aes_preheat
@@ -1107,6 +1387,17 @@ _vpaes_consts:
.Lk_dsbo: # decryption sbox final output
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+
+# .Lrev_ctr is a permutation which byte-swaps the counter portion of the IV.
+.Lrev_ctr:
+ .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+# .Lctr_add_* may be added to a byte-swapped xmm register to increment the
+# counter. The register must be byte-swapped again to form the actual input.
+.Lctr_add_one:
+ .quad 0x0000000000000000, 0x0000000100000000
+.Lctr_add_two:
+ .quad 0x0000000000000000, 0x0000000200000000
+
.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
.align 64
.size _vpaes_consts,.-_vpaes_consts
@@ -1222,6 +1513,10 @@ se_handler:
.rva .LSEH_end_${PREFIX}_cbc_encrypt
.rva .LSEH_info_${PREFIX}_cbc_encrypt
+ .rva .LSEH_begin_${PREFIX}_ctr32_encrypt_blocks
+ .rva .LSEH_end_${PREFIX}_ctr32_encrypt_blocks
+ .rva .LSEH_info_${PREFIX}_ctr32_encrypt_blocks
+
.section .xdata
.align 8
.LSEH_info_${PREFIX}_set_encrypt_key:
@@ -1244,6 +1539,10 @@ se_handler:
.byte 9,0,0,0
.rva se_handler
.rva .Lcbc_body,.Lcbc_epilogue # HandlerData[]
+.LSEH_info_${PREFIX}_ctr32_encrypt_blocks:
+ .byte 9,0,0,0
+ .rva se_handler
+ .rva .Lctr32_body,.Lctr32_epilogue # HandlerData[]
___
}
diff --git a/src/crypto/fipsmodule/aes/internal.h b/src/crypto/fipsmodule/aes/internal.h
index 63070bc6..0cebb04c 100644
--- a/src/crypto/fipsmodule/aes/internal.h
+++ b/src/crypto/fipsmodule/aes/internal.h
@@ -35,15 +35,13 @@ OPENSSL_INLINE int hwaes_capable(void) {
}
#define VPAES
+#if defined(OPENSSL_X86_64)
+#define VPAES_CTR32
+#endif
OPENSSL_INLINE int vpaes_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0;
}
-#if defined(OPENSSL_X86_64)
-#define BSAES
-OPENSSL_INLINE int bsaes_capable(void) { return vpaes_capable(); }
-#endif // X86_64
-
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#define HWAES
diff --git a/src/crypto/fipsmodule/bn/ctx.c b/src/crypto/fipsmodule/bn/ctx.c
index af50de93..1926e80b 100644
--- a/src/crypto/fipsmodule/bn/ctx.c
+++ b/src/crypto/fipsmodule/bn/ctx.c
@@ -54,6 +54,7 @@
#include <openssl/bn.h>
+#include <assert.h>
#include <string.h>
#include <openssl/err.h>
@@ -62,63 +63,46 @@
#include "../../internal.h"
-// How many bignums are in each "pool item";
-#define BN_CTX_POOL_SIZE 16
// The stack frame info is resizing, set a first-time expansion size;
#define BN_CTX_START_FRAMES 32
-// A bundle of bignums that can be linked with other bundles
-typedef struct bignum_pool_item {
- // The bignum values
- BIGNUM vals[BN_CTX_POOL_SIZE];
- // Linked-list admin
- struct bignum_pool_item *prev, *next;
-} BN_POOL_ITEM;
-
-
-typedef struct bignum_pool {
- // Linked-list admin
- BN_POOL_ITEM *head, *current, *tail;
- // Stack depth and allocation size
- unsigned used, size;
-} BN_POOL;
-
-static void BN_POOL_init(BN_POOL *);
-static void BN_POOL_finish(BN_POOL *);
-static BIGNUM *BN_POOL_get(BN_POOL *);
-static void BN_POOL_release(BN_POOL *, unsigned int);
-
// BN_STACK
-// A wrapper to manage the "stack frames"
-typedef struct bignum_ctx_stack {
- // Array of indexes into the bignum stack
- unsigned int *indexes;
+// A |BN_STACK| is a stack of |size_t| values.
+typedef struct {
+ // Array of indexes into |ctx->bignums|.
+ size_t *indexes;
// Number of stack frames, and the size of the allocated array
- unsigned int depth, size;
+ size_t depth, size;
} BN_STACK;
-static void BN_STACK_init(BN_STACK *);
-static void BN_STACK_finish(BN_STACK *);
-static int BN_STACK_push(BN_STACK *, unsigned int);
-static unsigned int BN_STACK_pop(BN_STACK *);
+static void BN_STACK_init(BN_STACK *);
+static void BN_STACK_cleanup(BN_STACK *);
+static int BN_STACK_push(BN_STACK *, size_t idx);
+static size_t BN_STACK_pop(BN_STACK *);
// BN_CTX
+DEFINE_STACK_OF(BIGNUM)
+
// The opaque BN_CTX type
struct bignum_ctx {
- // The bignum bundles
- BN_POOL pool;
- // The "stack frames", if you will
+ // bignums is the stack of |BIGNUM|s managed by this |BN_CTX|.
+ STACK_OF(BIGNUM) *bignums;
+ // stack is the stack of |BN_CTX_start| frames. It is the value of |used| at
+ // the time |BN_CTX_start| was called.
BN_STACK stack;
- // The number of bignums currently assigned
- unsigned int used;
- // Depth of stack overflow
- int err_stack;
- // Block "gets" until an "end" (compatibility behaviour)
- int too_many;
+ // used is the number of |BIGNUM|s from |bignums| that have been used.
+ size_t used;
+ // error is one if any operation on this |BN_CTX| failed. All subsequent
+ // operations will fail.
+ char error;
+ // defer_error is one if an operation on this |BN_CTX| has failed, but no
+ // error has been pushed to the queue yet. This is used to defer errors from
+ // |BN_CTX_start| to |BN_CTX_get|.
+ char defer_error;
};
BN_CTX *BN_CTX_new(void) {
@@ -129,11 +113,11 @@ BN_CTX *BN_CTX_new(void) {
}
// Initialise the structure
- BN_POOL_init(&ret->pool);
+ ret->bignums = NULL;
BN_STACK_init(&ret->stack);
ret->used = 0;
- ret->err_stack = 0;
- ret->too_many = 0;
+ ret->error = 0;
+ ret->defer_error = 0;
return ret;
}
@@ -142,57 +126,69 @@ void BN_CTX_free(BN_CTX *ctx) {
return;
}
- BN_STACK_finish(&ctx->stack);
- BN_POOL_finish(&ctx->pool);
+ sk_BIGNUM_pop_free(ctx->bignums, BN_free);
+ BN_STACK_cleanup(&ctx->stack);
OPENSSL_free(ctx);
}
void BN_CTX_start(BN_CTX *ctx) {
- // If we're already overflowing ...
- if (ctx->err_stack || ctx->too_many) {
- ctx->err_stack++;
- } else if (!BN_STACK_push(&ctx->stack, ctx->used)) {
- // (Try to) get a new frame pointer
- OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
- ctx->err_stack++;
+ if (ctx->error) {
+ // Once an operation has failed, |ctx->stack| no longer matches the number
+ // of |BN_CTX_end| calls to come. Do nothing.
+ return;
+ }
+
+ if (!BN_STACK_push(&ctx->stack, ctx->used)) {
+ ctx->error = 1;
+ // |BN_CTX_start| cannot fail, so defer the error to |BN_CTX_get|.
+ ctx->defer_error = 1;
}
}
BIGNUM *BN_CTX_get(BN_CTX *ctx) {
- BIGNUM *ret;
- if (ctx->err_stack || ctx->too_many) {
+ // Once any operation has failed, they all do.
+ if (ctx->error) {
+ if (ctx->defer_error) {
+ OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+ ctx->defer_error = 0;
+ }
return NULL;
}
- ret = BN_POOL_get(&ctx->pool);
- if (ret == NULL) {
- // Setting too_many prevents repeated "get" attempts from
- // cluttering the error stack.
- ctx->too_many = 1;
- OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
- return NULL;
+ if (ctx->bignums == NULL) {
+ ctx->bignums = sk_BIGNUM_new_null();
+ if (ctx->bignums == NULL) {
+ OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+ ctx->error = 1;
+ return NULL;
+ }
}
- // OK, make sure the returned bignum is "zero"
+ if (ctx->used == sk_BIGNUM_num(ctx->bignums)) {
+ BIGNUM *bn = BN_new();
+ if (bn == NULL || !sk_BIGNUM_push(ctx->bignums, bn)) {
+ OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+ BN_free(bn);
+ ctx->error = 1;
+ return NULL;
+ }
+ }
+
+ BIGNUM *ret = sk_BIGNUM_value(ctx->bignums, ctx->used);
BN_zero(ret);
+ // This is bounded by |sk_BIGNUM_num|, so it cannot overflow.
ctx->used++;
return ret;
}
void BN_CTX_end(BN_CTX *ctx) {
- if (ctx->err_stack) {
- ctx->err_stack--;
- } else {
- unsigned int fp = BN_STACK_pop(&ctx->stack);
- // Does this stack frame have anything to release?
- if (fp < ctx->used) {
- BN_POOL_release(&ctx->pool, ctx->used - fp);
- }
-
- ctx->used = fp;
- // Unjam "too_many" in case "get" had failed
- ctx->too_many = 0;
+ if (ctx->error) {
+ // Once an operation has failed, |ctx->stack| no longer matches the number
+ // of |BN_CTX_end| calls to come. Do nothing.
+ return;
}
+
+ ctx->used = BN_STACK_pop(&ctx->stack);
}
@@ -203,101 +199,34 @@ static void BN_STACK_init(BN_STACK *st) {
st->depth = st->size = 0;
}
-static void BN_STACK_finish(BN_STACK *st) {
+static void BN_STACK_cleanup(BN_STACK *st) {
OPENSSL_free(st->indexes);
}
-static int BN_STACK_push(BN_STACK *st, unsigned int idx) {
+static int BN_STACK_push(BN_STACK *st, size_t idx) {
if (st->depth == st->size) {
- // Need to expand
- unsigned int newsize =
- (st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES);
- unsigned int *newitems = OPENSSL_malloc(newsize * sizeof(unsigned int));
- if (!newitems) {
+ // This function intentionally does not push to the error queue on error.
+ // Error-reporting is deferred to |BN_CTX_get|.
+ size_t new_size = st->size != 0 ? st->size * 3 / 2 : BN_CTX_START_FRAMES;
+ if (new_size <= st->size || new_size > ((size_t)-1) / sizeof(size_t)) {
return 0;
}
- if (st->depth) {
- OPENSSL_memcpy(newitems, st->indexes, st->depth * sizeof(unsigned int));
+ size_t *new_indexes =
+ OPENSSL_realloc(st->indexes, new_size * sizeof(size_t));
+ if (new_indexes == NULL) {
+ return 0;
}
- OPENSSL_free(st->indexes);
- st->indexes = newitems;
- st->size = newsize;
+ st->indexes = new_indexes;
+ st->size = new_size;
}
- st->indexes[(st->depth)++] = idx;
+ st->indexes[st->depth] = idx;
+ st->depth++;
return 1;
}
-static unsigned int BN_STACK_pop(BN_STACK *st) {
- return st->indexes[--(st->depth)];
-}
-
-
-static void BN_POOL_init(BN_POOL *p) {
- p->head = p->current = p->tail = NULL;
- p->used = p->size = 0;
-}
-
-static void BN_POOL_finish(BN_POOL *p) {
- while (p->head) {
- for (size_t i = 0; i < BN_CTX_POOL_SIZE; i++) {
- BN_clear_free(&p->head->vals[i]);
- }
-
- p->current = p->head->next;
- OPENSSL_free(p->head);
- p->head = p->current;
- }
-}
-
-static BIGNUM *BN_POOL_get(BN_POOL *p) {
- if (p->used == p->size) {
- BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM));
- if (!item) {
- return NULL;
- }
-
- // Initialise the structure
- for (size_t i = 0; i < BN_CTX_POOL_SIZE; i++) {
- BN_init(&item->vals[i]);
- }
-
- item->prev = p->tail;
- item->next = NULL;
- // Link it in
- if (!p->head) {
- p->head = p->current = p->tail = item;
- } else {
- p->tail->next = item;
- p->tail = item;
- p->current = item;
- }
-
- p->size += BN_CTX_POOL_SIZE;
- p->used++;
- // Return the first bignum from the new pool
- return item->vals;
- }
-
- if (!p->used) {
- p->current = p->head;
- } else if ((p->used % BN_CTX_POOL_SIZE) == 0) {
- p->current = p->current->next;
- }
-
- return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
-}
-
-static void BN_POOL_release(BN_POOL *p, unsigned int num) {
- unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
- p->used -= num;
-
- while (num--) {
- if (!offset) {
- offset = BN_CTX_POOL_SIZE - 1;
- p->current = p->current->prev;
- } else {
- offset--;
- }
- }
+static size_t BN_STACK_pop(BN_STACK *st) {
+ assert(st->depth > 0);
+ st->depth--;
+ return st->indexes[st->depth];
}
diff --git a/src/crypto/fipsmodule/bn/exponentiation.c b/src/crypto/fipsmodule/bn/exponentiation.c
index 9e408113..8d4a5c8b 100644
--- a/src/crypto/fipsmodule/bn/exponentiation.c
+++ b/src/crypto/fipsmodule/bn/exponentiation.c
@@ -614,10 +614,9 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
BN_MONT_CTX *new_mont = NULL;
BN_CTX_start(ctx);
- BIGNUM *d = BN_CTX_get(ctx);
BIGNUM *r = BN_CTX_get(ctx);
val[0] = BN_CTX_get(ctx);
- if (!d || !r || !val[0]) {
+ if (r == NULL || val[0] == NULL) {
goto err;
}
@@ -639,7 +638,9 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
goto err;
}
if (window > 1) {
- if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
+ BIGNUM *d = BN_CTX_get(ctx);
+ if (d == NULL ||
+ !BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
goto err;
}
for (int i = 1; i < 1 << (window - 1); i++) {
diff --git a/src/crypto/fipsmodule/cipher/e_aes.c b/src/crypto/fipsmodule/cipher/e_aes.c
index a1859d74..dc94166c 100644
--- a/src/crypto/fipsmodule/cipher/e_aes.c
+++ b/src/crypto/fipsmodule/cipher/e_aes.c
@@ -230,7 +230,7 @@ static int aes_ofb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in,
ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
block128_f *out_block, const uint8_t *key,
- size_t key_bytes, int large_inputs) {
+ size_t key_bytes) {
if (hwaes_capable()) {
aes_hw_set_encrypt_key(key, key_bytes * 8, aes_key);
if (gcm_key != NULL) {
@@ -242,9 +242,7 @@ ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
return aes_hw_ctr32_encrypt_blocks;
}
- const int bsaes_ok = bsaes_capable();
- const int vpaes_ok = vpaes_capable();
- if (bsaes_ok && (large_inputs || !vpaes_ok)) {
+ if (bsaes_capable()) {
aes_nohw_set_encrypt_key(key, key_bytes * 8, aes_key);
if (gcm_key != NULL) {
CRYPTO_gcm128_init_key(gcm_key, aes_key, aes_nohw_encrypt, 0);
@@ -255,7 +253,7 @@ ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
return bsaes_ctr32_encrypt_blocks;
}
- if (vpaes_ok) {
+ if (vpaes_capable()) {
vpaes_set_encrypt_key(key, key_bytes * 8, aes_key);
if (out_block) {
*out_block = vpaes_encrypt;
@@ -317,7 +315,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key,
if (key) {
OPENSSL_memset(&gctx->gcm, 0, sizeof(gctx->gcm));
gctx->ctr = aes_ctr_set_key(&gctx->ks.ks, &gctx->gcm.gcm_key, NULL, key,
- ctx->key_len, 1 /* large inputs */);
+ ctx->key_len);
// If we have an iv can set it directly, otherwise use saved IV.
if (iv == NULL && gctx->iv_set) {
iv = gctx->iv;
@@ -860,8 +858,8 @@ static int aead_aes_gcm_init_impl(struct aead_aes_gcm_ctx *gcm_ctx,
return 0;
}
- gcm_ctx->ctr = aes_ctr_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm_key, NULL, key,
- key_len, 1 /* large inputs */);
+ gcm_ctx->ctr =
+ aes_ctr_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm_key, NULL, key, key_len);
*out_tag_len = tag_len;
return 1;
}
diff --git a/src/crypto/fipsmodule/cipher/internal.h b/src/crypto/fipsmodule/cipher/internal.h
index b9e61ec8..68efe33d 100644
--- a/src/crypto/fipsmodule/cipher/internal.h
+++ b/src/crypto/fipsmodule/cipher/internal.h
@@ -117,11 +117,9 @@ struct evp_aead_st {
// set to a function that encrypts single blocks. If not NULL, |*gcm_key| is
// initialised to do GHASH with the given key. It returns a function for
// optimised CTR-mode, or NULL if CTR-mode should be built using |*out_block|.
-// |large_input| is a hint to select AES implementations. If it is one, the
-// caller expects this key to be used with large inputs.
ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
block128_f *out_block, const uint8_t *key,
- size_t key_bytes, int large_input);
+ size_t key_bytes);
#if defined(__cplusplus)
} // extern C
diff --git a/src/crypto/fipsmodule/rand/ctrdrbg.c b/src/crypto/fipsmodule/rand/ctrdrbg.c
index 418f56b6..b2fda1da 100644
--- a/src/crypto/fipsmodule/rand/ctrdrbg.c
+++ b/src/crypto/fipsmodule/rand/ctrdrbg.c
@@ -57,12 +57,7 @@ int CTR_DRBG_init(CTR_DRBG_STATE *drbg,
seed_material[i] ^= kInitMask[i];
}
- // |RAND_bytes| is rarely called with large enough inputs for bsaes to be
- // faster than vpaes. bsaes also currently has side channel trade offs
- // (https://crbug.com/boringssl/256), which we should especially avoid in the
- // PRNG. (Note the size hint is a no-op on machines with AES instructions.)
- drbg->ctr = aes_ctr_set_key(&drbg->ks, NULL, &drbg->block, seed_material, 32,
- 0 /* small inputs */);
+ drbg->ctr = aes_ctr_set_key(&drbg->ks, NULL, &drbg->block, seed_material, 32);
OPENSSL_memcpy(drbg->counter.bytes, seed_material + 32, 16);
drbg->reseed_counter = 1;
@@ -98,8 +93,7 @@ static int ctr_drbg_update(CTR_DRBG_STATE *drbg, const uint8_t *data,
temp[i] ^= data[i];
}
- drbg->ctr = aes_ctr_set_key(&drbg->ks, NULL, &drbg->block, temp, 32,
- 0 /* small inputs */);
+ drbg->ctr = aes_ctr_set_key(&drbg->ks, NULL, &drbg->block, temp, 32);
OPENSSL_memcpy(drbg->counter.bytes, temp + 32, 16);
return 1;
diff --git a/src/crypto/impl_dispatch_test.cc b/src/crypto/impl_dispatch_test.cc
index f1192a7e..54ee704c 100644
--- a/src/crypto/impl_dispatch_test.cc
+++ b/src/crypto/impl_dispatch_test.cc
@@ -89,7 +89,6 @@ constexpr size_t kFlag_aesni_gcm_encrypt = 2;
constexpr size_t kFlag_aes_hw_set_encrypt_key = 3;
constexpr size_t kFlag_vpaes_encrypt = 4;
constexpr size_t kFlag_vpaes_set_encrypt_key = 5;
-constexpr size_t kFlag_bsaes_ctr32_encrypt_blocks = 6;
TEST_F(ImplDispatchTest, AEAD_AES_GCM) {
AssertFunctionsHit(
@@ -98,9 +97,8 @@ TEST_F(ImplDispatchTest, AEAD_AES_GCM) {
{kFlag_aes_hw_encrypt, aesni_},
{kFlag_aes_hw_set_encrypt_key, aesni_},
{kFlag_aesni_gcm_encrypt, is_x86_64_ && aesni_ && avx_movbe_},
- {kFlag_vpaes_encrypt, !is_x86_64_ && ssse3_ && !aesni_},
- {kFlag_vpaes_set_encrypt_key, !is_x86_64_ && ssse3_ && !aesni_},
- {kFlag_bsaes_ctr32_encrypt_blocks, is_x86_64_ && ssse3_ && !aesni_},
+ {kFlag_vpaes_encrypt, ssse3_ && !aesni_},
+ {kFlag_vpaes_set_encrypt_key, ssse3_ && !aesni_},
},
[] {
const uint8_t kZeros[16] = {0};
@@ -123,7 +121,6 @@ TEST_F(ImplDispatchTest, AES_set_encrypt_key) {
{
{kFlag_aes_hw_set_encrypt_key, aesni_},
{kFlag_vpaes_set_encrypt_key, ssse3_ && !aesni_},
- // BSAES will not be used for the |AES_*| functions.
},
[] {
AES_KEY key;
@@ -141,7 +138,6 @@ TEST_F(ImplDispatchTest, AES_single_block) {
{
{kFlag_aes_hw_encrypt, aesni_},
{kFlag_vpaes_encrypt, ssse3_ && !aesni_},
- // BSAES will not be used for the |AES_*| functions.
},
[&key] {
uint8_t in[AES_BLOCK_SIZE] = {0};
diff --git a/src/include/openssl/asn1.h b/src/include/openssl/asn1.h
index 8b61eaa3..6ae831b8 100644
--- a/src/include/openssl/asn1.h
+++ b/src/include/openssl/asn1.h
@@ -666,7 +666,6 @@ OPENSSL_EXPORT int d2i_ASN1_BOOLEAN(int *a,const unsigned char **pp,long lengt
DECLARE_ASN1_FUNCTIONS(ASN1_INTEGER)
OPENSSL_EXPORT int i2c_ASN1_INTEGER(ASN1_INTEGER *a,unsigned char **pp);
OPENSSL_EXPORT ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a,const unsigned char **pp, long length);
-OPENSSL_EXPORT ASN1_INTEGER *d2i_ASN1_UINTEGER(ASN1_INTEGER **a,const unsigned char **pp, long length);
OPENSSL_EXPORT ASN1_INTEGER * ASN1_INTEGER_dup(const ASN1_INTEGER *x);
OPENSSL_EXPORT int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y);
diff --git a/src/tool/speed.cc b/src/tool/speed.cc
index 14379cd4..a0fc905d 100644
--- a/src/tool/speed.cc
+++ b/src/tool/speed.cc
@@ -99,7 +99,7 @@ static uint64_t time_now() {
#endif
static uint64_t g_timeout_seconds = 1;
-static std::vector<size_t> g_chunk_lengths = {16, 256, 1350, 8192};
+static std::vector<size_t> g_chunk_lengths = {16, 256, 1350, 8192, 16384};
static bool TimeFunction(TimeResults *results, std::function<bool()> func) {
// total_us is the total amount of time that we'll aim to measure a function
@@ -846,7 +846,7 @@ static const struct argument kArguments[] = {
"-chunks",
kOptionalArgument,
"A comma-separated list of input sizes to run tests at (default is "
- "16,256,1350,8192)",
+ "16,256,1350,8192,16384)",
},
{
"",
diff --git a/win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm b/win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm
deleted file mode 100644
index 5fa4053e..00000000
--- a/win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm
+++ /dev/null
@@ -1,1777 +0,0 @@
-; This file is generated from a similarly-named Perl script in the BoringSSL
-; source tree. Do not edit by hand.
-
-default rel
-%define XMMWORD
-%define YMMWORD
-%define ZMMWORD
-
-%ifdef BORINGSSL_PREFIX
-%include "boringssl_prefix_symbols_nasm.inc"
-%endif
-section .text code align=64
-
-
-
-ALIGN 64
-_bsaes_encrypt8:
-
- lea r11,[$L$BS0]
-
- movdqa xmm8,XMMWORD[rax]
- lea rax,[16+rax]
- movdqa xmm7,XMMWORD[80+r11]
- pxor xmm15,xmm8
- pxor xmm0,xmm8
- pxor xmm1,xmm8
- pxor xmm2,xmm8
-DB 102,68,15,56,0,255
-DB 102,15,56,0,199
- pxor xmm3,xmm8
- pxor xmm4,xmm8
-DB 102,15,56,0,207
-DB 102,15,56,0,215
- pxor xmm5,xmm8
- pxor xmm6,xmm8
-DB 102,15,56,0,223
-DB 102,15,56,0,231
-DB 102,15,56,0,239
-DB 102,15,56,0,247
-_bsaes_encrypt8_bitslice:
- movdqa xmm7,XMMWORD[r11]
- movdqa xmm8,XMMWORD[16+r11]
- movdqa xmm9,xmm5
- psrlq xmm5,1
- movdqa xmm10,xmm3
- psrlq xmm3,1
- pxor xmm5,xmm6
- pxor xmm3,xmm4
- pand xmm5,xmm7
- pand xmm3,xmm7
- pxor xmm6,xmm5
- psllq xmm5,1
- pxor xmm4,xmm3
- psllq xmm3,1
- pxor xmm5,xmm9
- pxor xmm3,xmm10
- movdqa xmm9,xmm1
- psrlq xmm1,1
- movdqa xmm10,xmm15
- psrlq xmm15,1
- pxor xmm1,xmm2
- pxor xmm15,xmm0
- pand xmm1,xmm7
- pand xmm15,xmm7
- pxor xmm2,xmm1
- psllq xmm1,1
- pxor xmm0,xmm15
- psllq xmm15,1
- pxor xmm1,xmm9
- pxor xmm15,xmm10
- movdqa xmm7,XMMWORD[32+r11]
- movdqa xmm9,xmm4
- psrlq xmm4,2
- movdqa xmm10,xmm3
- psrlq xmm3,2
- pxor xmm4,xmm6
- pxor xmm3,xmm5
- pand xmm4,xmm8
- pand xmm3,xmm8
- pxor xmm6,xmm4
- psllq xmm4,2
- pxor xmm5,xmm3
- psllq xmm3,2
- pxor xmm4,xmm9
- pxor xmm3,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,2
- movdqa xmm10,xmm15
- psrlq xmm15,2
- pxor xmm0,xmm2
- pxor xmm15,xmm1
- pand xmm0,xmm8
- pand xmm15,xmm8
- pxor xmm2,xmm0
- psllq xmm0,2
- pxor xmm1,xmm15
- psllq xmm15,2
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- movdqa xmm9,xmm2
- psrlq xmm2,4
- movdqa xmm10,xmm1
- psrlq xmm1,4
- pxor xmm2,xmm6
- pxor xmm1,xmm5
- pand xmm2,xmm7
- pand xmm1,xmm7
- pxor xmm6,xmm2
- psllq xmm2,4
- pxor xmm5,xmm1
- psllq xmm1,4
- pxor xmm2,xmm9
- pxor xmm1,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,4
- movdqa xmm10,xmm15
- psrlq xmm15,4
- pxor xmm0,xmm4
- pxor xmm15,xmm3
- pand xmm0,xmm7
- pand xmm15,xmm7
- pxor xmm4,xmm0
- psllq xmm0,4
- pxor xmm3,xmm15
- psllq xmm15,4
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- dec r10d
- jmp NEAR $L$enc_sbox
-ALIGN 16
-$L$enc_loop:
- pxor xmm15,XMMWORD[rax]
- pxor xmm0,XMMWORD[16+rax]
- pxor xmm1,XMMWORD[32+rax]
- pxor xmm2,XMMWORD[48+rax]
-DB 102,68,15,56,0,255
-DB 102,15,56,0,199
- pxor xmm3,XMMWORD[64+rax]
- pxor xmm4,XMMWORD[80+rax]
-DB 102,15,56,0,207
-DB 102,15,56,0,215
- pxor xmm5,XMMWORD[96+rax]
- pxor xmm6,XMMWORD[112+rax]
-DB 102,15,56,0,223
-DB 102,15,56,0,231
-DB 102,15,56,0,239
-DB 102,15,56,0,247
- lea rax,[128+rax]
-$L$enc_sbox:
- pxor xmm4,xmm5
- pxor xmm1,xmm0
- pxor xmm2,xmm15
- pxor xmm5,xmm1
- pxor xmm4,xmm15
-
- pxor xmm5,xmm2
- pxor xmm2,xmm6
- pxor xmm6,xmm4
- pxor xmm2,xmm3
- pxor xmm3,xmm4
- pxor xmm2,xmm0
-
- pxor xmm1,xmm6
- pxor xmm0,xmm4
- movdqa xmm10,xmm6
- movdqa xmm9,xmm0
- movdqa xmm8,xmm4
- movdqa xmm12,xmm1
- movdqa xmm11,xmm5
-
- pxor xmm10,xmm3
- pxor xmm9,xmm1
- pxor xmm8,xmm2
- movdqa xmm13,xmm10
- pxor xmm12,xmm3
- movdqa xmm7,xmm9
- pxor xmm11,xmm15
- movdqa xmm14,xmm10
-
- por xmm9,xmm8
- por xmm10,xmm11
- pxor xmm14,xmm7
- pand xmm13,xmm11
- pxor xmm11,xmm8
- pand xmm7,xmm8
- pand xmm14,xmm11
- movdqa xmm11,xmm2
- pxor xmm11,xmm15
- pand xmm12,xmm11
- pxor xmm10,xmm12
- pxor xmm9,xmm12
- movdqa xmm12,xmm6
- movdqa xmm11,xmm4
- pxor xmm12,xmm0
- pxor xmm11,xmm5
- movdqa xmm8,xmm12
- pand xmm12,xmm11
- por xmm8,xmm11
- pxor xmm7,xmm12
- pxor xmm10,xmm14
- pxor xmm9,xmm13
- pxor xmm8,xmm14
- movdqa xmm11,xmm1
- pxor xmm7,xmm13
- movdqa xmm12,xmm3
- pxor xmm8,xmm13
- movdqa xmm13,xmm0
- pand xmm11,xmm2
- movdqa xmm14,xmm6
- pand xmm12,xmm15
- pand xmm13,xmm4
- por xmm14,xmm5
- pxor xmm10,xmm11
- pxor xmm9,xmm12
- pxor xmm8,xmm13
- pxor xmm7,xmm14
-
-
-
-
-
- movdqa xmm11,xmm10
- pand xmm10,xmm8
- pxor xmm11,xmm9
-
- movdqa xmm13,xmm7
- movdqa xmm14,xmm11
- pxor xmm13,xmm10
- pand xmm14,xmm13
-
- movdqa xmm12,xmm8
- pxor xmm14,xmm9
- pxor xmm12,xmm7
-
- pxor xmm10,xmm9
-
- pand xmm12,xmm10
-
- movdqa xmm9,xmm13
- pxor xmm12,xmm7
-
- pxor xmm9,xmm12
- pxor xmm8,xmm12
-
- pand xmm9,xmm7
-
- pxor xmm13,xmm9
- pxor xmm8,xmm9
-
- pand xmm13,xmm14
-
- pxor xmm13,xmm11
- movdqa xmm11,xmm5
- movdqa xmm7,xmm4
- movdqa xmm9,xmm14
- pxor xmm9,xmm13
- pand xmm9,xmm5
- pxor xmm5,xmm4
- pand xmm4,xmm14
- pand xmm5,xmm13
- pxor xmm5,xmm4
- pxor xmm4,xmm9
- pxor xmm11,xmm15
- pxor xmm7,xmm2
- pxor xmm14,xmm12
- pxor xmm13,xmm8
- movdqa xmm10,xmm14
- movdqa xmm9,xmm12
- pxor xmm10,xmm13
- pxor xmm9,xmm8
- pand xmm10,xmm11
- pand xmm9,xmm15
- pxor xmm11,xmm7
- pxor xmm15,xmm2
- pand xmm7,xmm14
- pand xmm2,xmm12
- pand xmm11,xmm13
- pand xmm15,xmm8
- pxor xmm7,xmm11
- pxor xmm15,xmm2
- pxor xmm11,xmm10
- pxor xmm2,xmm9
- pxor xmm5,xmm11
- pxor xmm15,xmm11
- pxor xmm4,xmm7
- pxor xmm2,xmm7
-
- movdqa xmm11,xmm6
- movdqa xmm7,xmm0
- pxor xmm11,xmm3
- pxor xmm7,xmm1
- movdqa xmm10,xmm14
- movdqa xmm9,xmm12
- pxor xmm10,xmm13
- pxor xmm9,xmm8
- pand xmm10,xmm11
- pand xmm9,xmm3
- pxor xmm11,xmm7
- pxor xmm3,xmm1
- pand xmm7,xmm14
- pand xmm1,xmm12
- pand xmm11,xmm13
- pand xmm3,xmm8
- pxor xmm7,xmm11
- pxor xmm3,xmm1
- pxor xmm11,xmm10
- pxor xmm1,xmm9
- pxor xmm14,xmm12
- pxor xmm13,xmm8
- movdqa xmm10,xmm14
- pxor xmm10,xmm13
- pand xmm10,xmm6
- pxor xmm6,xmm0
- pand xmm0,xmm14
- pand xmm6,xmm13
- pxor xmm6,xmm0
- pxor xmm0,xmm10
- pxor xmm6,xmm11
- pxor xmm3,xmm11
- pxor xmm0,xmm7
- pxor xmm1,xmm7
- pxor xmm6,xmm15
- pxor xmm0,xmm5
- pxor xmm3,xmm6
- pxor xmm5,xmm15
- pxor xmm15,xmm0
-
- pxor xmm0,xmm4
- pxor xmm4,xmm1
- pxor xmm1,xmm2
- pxor xmm2,xmm4
- pxor xmm3,xmm4
-
- pxor xmm5,xmm2
- dec r10d
- jl NEAR $L$enc_done
- pshufd xmm7,xmm15,0x93
- pshufd xmm8,xmm0,0x93
- pxor xmm15,xmm7
- pshufd xmm9,xmm3,0x93
- pxor xmm0,xmm8
- pshufd xmm10,xmm5,0x93
- pxor xmm3,xmm9
- pshufd xmm11,xmm2,0x93
- pxor xmm5,xmm10
- pshufd xmm12,xmm6,0x93
- pxor xmm2,xmm11
- pshufd xmm13,xmm1,0x93
- pxor xmm6,xmm12
- pshufd xmm14,xmm4,0x93
- pxor xmm1,xmm13
- pxor xmm4,xmm14
-
- pxor xmm8,xmm15
- pxor xmm7,xmm4
- pxor xmm8,xmm4
- pshufd xmm15,xmm15,0x4E
- pxor xmm9,xmm0
- pshufd xmm0,xmm0,0x4E
- pxor xmm12,xmm2
- pxor xmm15,xmm7
- pxor xmm13,xmm6
- pxor xmm0,xmm8
- pxor xmm11,xmm5
- pshufd xmm7,xmm2,0x4E
- pxor xmm14,xmm1
- pshufd xmm8,xmm6,0x4E
- pxor xmm10,xmm3
- pshufd xmm2,xmm5,0x4E
- pxor xmm10,xmm4
- pshufd xmm6,xmm4,0x4E
- pxor xmm11,xmm4
- pshufd xmm5,xmm1,0x4E
- pxor xmm7,xmm11
- pshufd xmm1,xmm3,0x4E
- pxor xmm8,xmm12
- pxor xmm2,xmm10
- pxor xmm6,xmm14
- pxor xmm5,xmm13
- movdqa xmm3,xmm7
- pxor xmm1,xmm9
- movdqa xmm4,xmm8
- movdqa xmm7,XMMWORD[48+r11]
- jnz NEAR $L$enc_loop
- movdqa xmm7,XMMWORD[64+r11]
- jmp NEAR $L$enc_loop
-ALIGN 16
-$L$enc_done:
- movdqa xmm7,XMMWORD[r11]
- movdqa xmm8,XMMWORD[16+r11]
- movdqa xmm9,xmm1
- psrlq xmm1,1
- movdqa xmm10,xmm2
- psrlq xmm2,1
- pxor xmm1,xmm4
- pxor xmm2,xmm6
- pand xmm1,xmm7
- pand xmm2,xmm7
- pxor xmm4,xmm1
- psllq xmm1,1
- pxor xmm6,xmm2
- psllq xmm2,1
- pxor xmm1,xmm9
- pxor xmm2,xmm10
- movdqa xmm9,xmm3
- psrlq xmm3,1
- movdqa xmm10,xmm15
- psrlq xmm15,1
- pxor xmm3,xmm5
- pxor xmm15,xmm0
- pand xmm3,xmm7
- pand xmm15,xmm7
- pxor xmm5,xmm3
- psllq xmm3,1
- pxor xmm0,xmm15
- psllq xmm15,1
- pxor xmm3,xmm9
- pxor xmm15,xmm10
- movdqa xmm7,XMMWORD[32+r11]
- movdqa xmm9,xmm6
- psrlq xmm6,2
- movdqa xmm10,xmm2
- psrlq xmm2,2
- pxor xmm6,xmm4
- pxor xmm2,xmm1
- pand xmm6,xmm8
- pand xmm2,xmm8
- pxor xmm4,xmm6
- psllq xmm6,2
- pxor xmm1,xmm2
- psllq xmm2,2
- pxor xmm6,xmm9
- pxor xmm2,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,2
- movdqa xmm10,xmm15
- psrlq xmm15,2
- pxor xmm0,xmm5
- pxor xmm15,xmm3
- pand xmm0,xmm8
- pand xmm15,xmm8
- pxor xmm5,xmm0
- psllq xmm0,2
- pxor xmm3,xmm15
- psllq xmm15,2
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- movdqa xmm9,xmm5
- psrlq xmm5,4
- movdqa xmm10,xmm3
- psrlq xmm3,4
- pxor xmm5,xmm4
- pxor xmm3,xmm1
- pand xmm5,xmm7
- pand xmm3,xmm7
- pxor xmm4,xmm5
- psllq xmm5,4
- pxor xmm1,xmm3
- psllq xmm3,4
- pxor xmm5,xmm9
- pxor xmm3,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,4
- movdqa xmm10,xmm15
- psrlq xmm15,4
- pxor xmm0,xmm6
- pxor xmm15,xmm2
- pand xmm0,xmm7
- pand xmm15,xmm7
- pxor xmm6,xmm0
- psllq xmm0,4
- pxor xmm2,xmm15
- psllq xmm15,4
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- movdqa xmm7,XMMWORD[rax]
- pxor xmm3,xmm7
- pxor xmm5,xmm7
- pxor xmm2,xmm7
- pxor xmm6,xmm7
- pxor xmm1,xmm7
- pxor xmm4,xmm7
- pxor xmm15,xmm7
- pxor xmm0,xmm7
- DB 0F3h,0C3h ;repret
-
-
-
-
-ALIGN 64
-_bsaes_decrypt8:
-
- lea r11,[$L$BS0]
-
- movdqa xmm8,XMMWORD[rax]
- lea rax,[16+rax]
- movdqa xmm7,XMMWORD[((-48))+r11]
- pxor xmm15,xmm8
- pxor xmm0,xmm8
- pxor xmm1,xmm8
- pxor xmm2,xmm8
-DB 102,68,15,56,0,255
-DB 102,15,56,0,199
- pxor xmm3,xmm8
- pxor xmm4,xmm8
-DB 102,15,56,0,207
-DB 102,15,56,0,215
- pxor xmm5,xmm8
- pxor xmm6,xmm8
-DB 102,15,56,0,223
-DB 102,15,56,0,231
-DB 102,15,56,0,239
-DB 102,15,56,0,247
- movdqa xmm7,XMMWORD[r11]
- movdqa xmm8,XMMWORD[16+r11]
- movdqa xmm9,xmm5
- psrlq xmm5,1
- movdqa xmm10,xmm3
- psrlq xmm3,1
- pxor xmm5,xmm6
- pxor xmm3,xmm4
- pand xmm5,xmm7
- pand xmm3,xmm7
- pxor xmm6,xmm5
- psllq xmm5,1
- pxor xmm4,xmm3
- psllq xmm3,1
- pxor xmm5,xmm9
- pxor xmm3,xmm10
- movdqa xmm9,xmm1
- psrlq xmm1,1
- movdqa xmm10,xmm15
- psrlq xmm15,1
- pxor xmm1,xmm2
- pxor xmm15,xmm0
- pand xmm1,xmm7
- pand xmm15,xmm7
- pxor xmm2,xmm1
- psllq xmm1,1
- pxor xmm0,xmm15
- psllq xmm15,1
- pxor xmm1,xmm9
- pxor xmm15,xmm10
- movdqa xmm7,XMMWORD[32+r11]
- movdqa xmm9,xmm4
- psrlq xmm4,2
- movdqa xmm10,xmm3
- psrlq xmm3,2
- pxor xmm4,xmm6
- pxor xmm3,xmm5
- pand xmm4,xmm8
- pand xmm3,xmm8
- pxor xmm6,xmm4
- psllq xmm4,2
- pxor xmm5,xmm3
- psllq xmm3,2
- pxor xmm4,xmm9
- pxor xmm3,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,2
- movdqa xmm10,xmm15
- psrlq xmm15,2
- pxor xmm0,xmm2
- pxor xmm15,xmm1
- pand xmm0,xmm8
- pand xmm15,xmm8
- pxor xmm2,xmm0
- psllq xmm0,2
- pxor xmm1,xmm15
- psllq xmm15,2
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- movdqa xmm9,xmm2
- psrlq xmm2,4
- movdqa xmm10,xmm1
- psrlq xmm1,4
- pxor xmm2,xmm6
- pxor xmm1,xmm5
- pand xmm2,xmm7
- pand xmm1,xmm7
- pxor xmm6,xmm2
- psllq xmm2,4
- pxor xmm5,xmm1
- psllq xmm1,4
- pxor xmm2,xmm9
- pxor xmm1,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,4
- movdqa xmm10,xmm15
- psrlq xmm15,4
- pxor xmm0,xmm4
- pxor xmm15,xmm3
- pand xmm0,xmm7
- pand xmm15,xmm7
- pxor xmm4,xmm0
- psllq xmm0,4
- pxor xmm3,xmm15
- psllq xmm15,4
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- dec r10d
- jmp NEAR $L$dec_sbox
-ALIGN 16
-$L$dec_loop:
- pxor xmm15,XMMWORD[rax]
- pxor xmm0,XMMWORD[16+rax]
- pxor xmm1,XMMWORD[32+rax]
- pxor xmm2,XMMWORD[48+rax]
-DB 102,68,15,56,0,255
-DB 102,15,56,0,199
- pxor xmm3,XMMWORD[64+rax]
- pxor xmm4,XMMWORD[80+rax]
-DB 102,15,56,0,207
-DB 102,15,56,0,215
- pxor xmm5,XMMWORD[96+rax]
- pxor xmm6,XMMWORD[112+rax]
-DB 102,15,56,0,223
-DB 102,15,56,0,231
-DB 102,15,56,0,239
-DB 102,15,56,0,247
- lea rax,[128+rax]
-$L$dec_sbox:
- pxor xmm2,xmm3
-
- pxor xmm3,xmm6
- pxor xmm1,xmm6
- pxor xmm5,xmm3
- pxor xmm6,xmm5
- pxor xmm0,xmm6
-
- pxor xmm15,xmm0
- pxor xmm1,xmm4
- pxor xmm2,xmm15
- pxor xmm4,xmm15
- pxor xmm0,xmm2
- movdqa xmm10,xmm2
- movdqa xmm9,xmm6
- movdqa xmm8,xmm0
- movdqa xmm12,xmm3
- movdqa xmm11,xmm4
-
- pxor xmm10,xmm15
- pxor xmm9,xmm3
- pxor xmm8,xmm5
- movdqa xmm13,xmm10
- pxor xmm12,xmm15
- movdqa xmm7,xmm9
- pxor xmm11,xmm1
- movdqa xmm14,xmm10
-
- por xmm9,xmm8
- por xmm10,xmm11
- pxor xmm14,xmm7
- pand xmm13,xmm11
- pxor xmm11,xmm8
- pand xmm7,xmm8
- pand xmm14,xmm11
- movdqa xmm11,xmm5
- pxor xmm11,xmm1
- pand xmm12,xmm11
- pxor xmm10,xmm12
- pxor xmm9,xmm12
- movdqa xmm12,xmm2
- movdqa xmm11,xmm0
- pxor xmm12,xmm6
- pxor xmm11,xmm4
- movdqa xmm8,xmm12
- pand xmm12,xmm11
- por xmm8,xmm11
- pxor xmm7,xmm12
- pxor xmm10,xmm14
- pxor xmm9,xmm13
- pxor xmm8,xmm14
- movdqa xmm11,xmm3
- pxor xmm7,xmm13
- movdqa xmm12,xmm15
- pxor xmm8,xmm13
- movdqa xmm13,xmm6
- pand xmm11,xmm5
- movdqa xmm14,xmm2
- pand xmm12,xmm1
- pand xmm13,xmm0
- por xmm14,xmm4
- pxor xmm10,xmm11
- pxor xmm9,xmm12
- pxor xmm8,xmm13
- pxor xmm7,xmm14
-
-
-
-
-
- movdqa xmm11,xmm10
- pand xmm10,xmm8
- pxor xmm11,xmm9
-
- movdqa xmm13,xmm7
- movdqa xmm14,xmm11
- pxor xmm13,xmm10
- pand xmm14,xmm13
-
- movdqa xmm12,xmm8
- pxor xmm14,xmm9
- pxor xmm12,xmm7
-
- pxor xmm10,xmm9
-
- pand xmm12,xmm10
-
- movdqa xmm9,xmm13
- pxor xmm12,xmm7
-
- pxor xmm9,xmm12
- pxor xmm8,xmm12
-
- pand xmm9,xmm7
-
- pxor xmm13,xmm9
- pxor xmm8,xmm9
-
- pand xmm13,xmm14
-
- pxor xmm13,xmm11
- movdqa xmm11,xmm4
- movdqa xmm7,xmm0
- movdqa xmm9,xmm14
- pxor xmm9,xmm13
- pand xmm9,xmm4
- pxor xmm4,xmm0
- pand xmm0,xmm14
- pand xmm4,xmm13
- pxor xmm4,xmm0
- pxor xmm0,xmm9
- pxor xmm11,xmm1
- pxor xmm7,xmm5
- pxor xmm14,xmm12
- pxor xmm13,xmm8
- movdqa xmm10,xmm14
- movdqa xmm9,xmm12
- pxor xmm10,xmm13
- pxor xmm9,xmm8
- pand xmm10,xmm11
- pand xmm9,xmm1
- pxor xmm11,xmm7
- pxor xmm1,xmm5
- pand xmm7,xmm14
- pand xmm5,xmm12
- pand xmm11,xmm13
- pand xmm1,xmm8
- pxor xmm7,xmm11
- pxor xmm1,xmm5
- pxor xmm11,xmm10
- pxor xmm5,xmm9
- pxor xmm4,xmm11
- pxor xmm1,xmm11
- pxor xmm0,xmm7
- pxor xmm5,xmm7
-
- movdqa xmm11,xmm2
- movdqa xmm7,xmm6
- pxor xmm11,xmm15
- pxor xmm7,xmm3
- movdqa xmm10,xmm14
- movdqa xmm9,xmm12
- pxor xmm10,xmm13
- pxor xmm9,xmm8
- pand xmm10,xmm11
- pand xmm9,xmm15
- pxor xmm11,xmm7
- pxor xmm15,xmm3
- pand xmm7,xmm14
- pand xmm3,xmm12
- pand xmm11,xmm13
- pand xmm15,xmm8
- pxor xmm7,xmm11
- pxor xmm15,xmm3
- pxor xmm11,xmm10
- pxor xmm3,xmm9
- pxor xmm14,xmm12
- pxor xmm13,xmm8
- movdqa xmm10,xmm14
- pxor xmm10,xmm13
- pand xmm10,xmm2
- pxor xmm2,xmm6
- pand xmm6,xmm14
- pand xmm2,xmm13
- pxor xmm2,xmm6
- pxor xmm6,xmm10
- pxor xmm2,xmm11
- pxor xmm15,xmm11
- pxor xmm6,xmm7
- pxor xmm3,xmm7
- pxor xmm0,xmm6
- pxor xmm5,xmm4
-
- pxor xmm3,xmm0
- pxor xmm1,xmm6
- pxor xmm4,xmm6
- pxor xmm3,xmm1
- pxor xmm6,xmm15
- pxor xmm3,xmm4
- pxor xmm2,xmm5
- pxor xmm5,xmm0
- pxor xmm2,xmm3
-
- pxor xmm3,xmm15
- pxor xmm6,xmm2
- dec r10d
- jl NEAR $L$dec_done
-
- pshufd xmm7,xmm15,0x4E
- pshufd xmm13,xmm2,0x4E
- pxor xmm7,xmm15
- pshufd xmm14,xmm4,0x4E
- pxor xmm13,xmm2
- pshufd xmm8,xmm0,0x4E
- pxor xmm14,xmm4
- pshufd xmm9,xmm5,0x4E
- pxor xmm8,xmm0
- pshufd xmm10,xmm3,0x4E
- pxor xmm9,xmm5
- pxor xmm15,xmm13
- pxor xmm0,xmm13
- pshufd xmm11,xmm1,0x4E
- pxor xmm10,xmm3
- pxor xmm5,xmm7
- pxor xmm3,xmm8
- pshufd xmm12,xmm6,0x4E
- pxor xmm11,xmm1
- pxor xmm0,xmm14
- pxor xmm1,xmm9
- pxor xmm12,xmm6
-
- pxor xmm5,xmm14
- pxor xmm3,xmm13
- pxor xmm1,xmm13
- pxor xmm6,xmm10
- pxor xmm2,xmm11
- pxor xmm1,xmm14
- pxor xmm6,xmm14
- pxor xmm4,xmm12
- pshufd xmm7,xmm15,0x93
- pshufd xmm8,xmm0,0x93
- pxor xmm15,xmm7
- pshufd xmm9,xmm5,0x93
- pxor xmm0,xmm8
- pshufd xmm10,xmm3,0x93
- pxor xmm5,xmm9
- pshufd xmm11,xmm1,0x93
- pxor xmm3,xmm10
- pshufd xmm12,xmm6,0x93
- pxor xmm1,xmm11
- pshufd xmm13,xmm2,0x93
- pxor xmm6,xmm12
- pshufd xmm14,xmm4,0x93
- pxor xmm2,xmm13
- pxor xmm4,xmm14
-
- pxor xmm8,xmm15
- pxor xmm7,xmm4
- pxor xmm8,xmm4
- pshufd xmm15,xmm15,0x4E
- pxor xmm9,xmm0
- pshufd xmm0,xmm0,0x4E
- pxor xmm12,xmm1
- pxor xmm15,xmm7
- pxor xmm13,xmm6
- pxor xmm0,xmm8
- pxor xmm11,xmm3
- pshufd xmm7,xmm1,0x4E
- pxor xmm14,xmm2
- pshufd xmm8,xmm6,0x4E
- pxor xmm10,xmm5
- pshufd xmm1,xmm3,0x4E
- pxor xmm10,xmm4
- pshufd xmm6,xmm4,0x4E
- pxor xmm11,xmm4
- pshufd xmm3,xmm2,0x4E
- pxor xmm7,xmm11
- pshufd xmm2,xmm5,0x4E
- pxor xmm8,xmm12
- pxor xmm10,xmm1
- pxor xmm6,xmm14
- pxor xmm13,xmm3
- movdqa xmm3,xmm7
- pxor xmm2,xmm9
- movdqa xmm5,xmm13
- movdqa xmm4,xmm8
- movdqa xmm1,xmm2
- movdqa xmm2,xmm10
- movdqa xmm7,XMMWORD[((-16))+r11]
- jnz NEAR $L$dec_loop
- movdqa xmm7,XMMWORD[((-32))+r11]
- jmp NEAR $L$dec_loop
-ALIGN 16
-$L$dec_done:
- movdqa xmm7,XMMWORD[r11]
- movdqa xmm8,XMMWORD[16+r11]
- movdqa xmm9,xmm2
- psrlq xmm2,1
- movdqa xmm10,xmm1
- psrlq xmm1,1
- pxor xmm2,xmm4
- pxor xmm1,xmm6
- pand xmm2,xmm7
- pand xmm1,xmm7
- pxor xmm4,xmm2
- psllq xmm2,1
- pxor xmm6,xmm1
- psllq xmm1,1
- pxor xmm2,xmm9
- pxor xmm1,xmm10
- movdqa xmm9,xmm5
- psrlq xmm5,1
- movdqa xmm10,xmm15
- psrlq xmm15,1
- pxor xmm5,xmm3
- pxor xmm15,xmm0
- pand xmm5,xmm7
- pand xmm15,xmm7
- pxor xmm3,xmm5
- psllq xmm5,1
- pxor xmm0,xmm15
- psllq xmm15,1
- pxor xmm5,xmm9
- pxor xmm15,xmm10
- movdqa xmm7,XMMWORD[32+r11]
- movdqa xmm9,xmm6
- psrlq xmm6,2
- movdqa xmm10,xmm1
- psrlq xmm1,2
- pxor xmm6,xmm4
- pxor xmm1,xmm2
- pand xmm6,xmm8
- pand xmm1,xmm8
- pxor xmm4,xmm6
- psllq xmm6,2
- pxor xmm2,xmm1
- psllq xmm1,2
- pxor xmm6,xmm9
- pxor xmm1,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,2
- movdqa xmm10,xmm15
- psrlq xmm15,2
- pxor xmm0,xmm3
- pxor xmm15,xmm5
- pand xmm0,xmm8
- pand xmm15,xmm8
- pxor xmm3,xmm0
- psllq xmm0,2
- pxor xmm5,xmm15
- psllq xmm15,2
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- movdqa xmm9,xmm3
- psrlq xmm3,4
- movdqa xmm10,xmm5
- psrlq xmm5,4
- pxor xmm3,xmm4
- pxor xmm5,xmm2
- pand xmm3,xmm7
- pand xmm5,xmm7
- pxor xmm4,xmm3
- psllq xmm3,4
- pxor xmm2,xmm5
- psllq xmm5,4
- pxor xmm3,xmm9
- pxor xmm5,xmm10
- movdqa xmm9,xmm0
- psrlq xmm0,4
- movdqa xmm10,xmm15
- psrlq xmm15,4
- pxor xmm0,xmm6
- pxor xmm15,xmm1
- pand xmm0,xmm7
- pand xmm15,xmm7
- pxor xmm6,xmm0
- psllq xmm0,4
- pxor xmm1,xmm15
- psllq xmm15,4
- pxor xmm0,xmm9
- pxor xmm15,xmm10
- movdqa xmm7,XMMWORD[rax]
- pxor xmm5,xmm7
- pxor xmm3,xmm7
- pxor xmm1,xmm7
- pxor xmm6,xmm7
- pxor xmm2,xmm7
- pxor xmm4,xmm7
- pxor xmm15,xmm7
- pxor xmm0,xmm7
- DB 0F3h,0C3h ;repret
-
-
-
-ALIGN 16
-_bsaes_key_convert:
-
- lea r11,[$L$masks]
- movdqu xmm7,XMMWORD[rcx]
- lea rcx,[16+rcx]
- movdqa xmm0,XMMWORD[r11]
- movdqa xmm1,XMMWORD[16+r11]
- movdqa xmm2,XMMWORD[32+r11]
- movdqa xmm3,XMMWORD[48+r11]
- movdqa xmm4,XMMWORD[64+r11]
- pcmpeqd xmm5,xmm5
-
- movdqu xmm6,XMMWORD[rcx]
- movdqa XMMWORD[rax],xmm7
- lea rax,[16+rax]
- dec r10d
- jmp NEAR $L$key_loop
-ALIGN 16
-$L$key_loop:
-DB 102,15,56,0,244
-
- movdqa xmm8,xmm0
- movdqa xmm9,xmm1
-
- pand xmm8,xmm6
- pand xmm9,xmm6
- movdqa xmm10,xmm2
- pcmpeqb xmm8,xmm0
- psllq xmm0,4
- movdqa xmm11,xmm3
- pcmpeqb xmm9,xmm1
- psllq xmm1,4
-
- pand xmm10,xmm6
- pand xmm11,xmm6
- movdqa xmm12,xmm0
- pcmpeqb xmm10,xmm2
- psllq xmm2,4
- movdqa xmm13,xmm1
- pcmpeqb xmm11,xmm3
- psllq xmm3,4
-
- movdqa xmm14,xmm2
- movdqa xmm15,xmm3
- pxor xmm8,xmm5
- pxor xmm9,xmm5
-
- pand xmm12,xmm6
- pand xmm13,xmm6
- movdqa XMMWORD[rax],xmm8
- pcmpeqb xmm12,xmm0
- psrlq xmm0,4
- movdqa XMMWORD[16+rax],xmm9
- pcmpeqb xmm13,xmm1
- psrlq xmm1,4
- lea rcx,[16+rcx]
-
- pand xmm14,xmm6
- pand xmm15,xmm6
- movdqa XMMWORD[32+rax],xmm10
- pcmpeqb xmm14,xmm2
- psrlq xmm2,4
- movdqa XMMWORD[48+rax],xmm11
- pcmpeqb xmm15,xmm3
- psrlq xmm3,4
- movdqu xmm6,XMMWORD[rcx]
-
- pxor xmm13,xmm5
- pxor xmm14,xmm5
- movdqa XMMWORD[64+rax],xmm12
- movdqa XMMWORD[80+rax],xmm13
- movdqa XMMWORD[96+rax],xmm14
- movdqa XMMWORD[112+rax],xmm15
- lea rax,[128+rax]
- dec r10d
- jnz NEAR $L$key_loop
-
- movdqa xmm7,XMMWORD[80+r11]
-
- DB 0F3h,0C3h ;repret
-
-
-global bsaes_cbc_encrypt
-
-ALIGN 16
-bsaes_cbc_encrypt:
-
-
-
-
- mov rax,rsp
-$L$cbc_dec_prologue:
- push rbp
-
- push rbx
-
- push r12
-
- push r13
-
- push r14
-
- push r15
-
- lea rsp,[((-72))+rsp]
-
- mov r10,QWORD[160+rsp]
- lea rsp,[((-160))+rsp]
- movaps XMMWORD[64+rsp],xmm6
- movaps XMMWORD[80+rsp],xmm7
- movaps XMMWORD[96+rsp],xmm8
- movaps XMMWORD[112+rsp],xmm9
- movaps XMMWORD[128+rsp],xmm10
- movaps XMMWORD[144+rsp],xmm11
- movaps XMMWORD[160+rsp],xmm12
- movaps XMMWORD[176+rsp],xmm13
- movaps XMMWORD[192+rsp],xmm14
- movaps XMMWORD[208+rsp],xmm15
-$L$cbc_dec_body:
- mov rbp,rsp
-
- mov eax,DWORD[240+r9]
- mov r12,rcx
- mov r13,rdx
- mov r14,r8
- mov r15,r9
- mov rbx,r10
- shr r14,4
-
- mov edx,eax
- shl rax,7
- sub rax,96
- sub rsp,rax
-
- mov rax,rsp
- mov rcx,r15
- mov r10d,edx
- call _bsaes_key_convert
- pxor xmm7,XMMWORD[rsp]
- movdqa XMMWORD[rax],xmm6
- movdqa XMMWORD[rsp],xmm7
-
- movdqu xmm14,XMMWORD[rbx]
- sub r14,8
- jc NEAR $L$cbc_dec_loop_done
-
-$L$cbc_dec_loop:
- movdqu xmm15,XMMWORD[r12]
- movdqu xmm0,XMMWORD[16+r12]
- movdqu xmm1,XMMWORD[32+r12]
- movdqu xmm2,XMMWORD[48+r12]
- movdqu xmm3,XMMWORD[64+r12]
- movdqu xmm4,XMMWORD[80+r12]
- mov rax,rsp
- movdqu xmm5,XMMWORD[96+r12]
- mov r10d,edx
- movdqu xmm6,XMMWORD[112+r12]
- movdqa XMMWORD[32+rbp],xmm14
-
- call _bsaes_decrypt8
-
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu xmm9,XMMWORD[32+r12]
- pxor xmm5,xmm8
- movdqu xmm10,XMMWORD[48+r12]
- pxor xmm3,xmm9
- movdqu xmm11,XMMWORD[64+r12]
- pxor xmm1,xmm10
- movdqu xmm12,XMMWORD[80+r12]
- pxor xmm6,xmm11
- movdqu xmm13,XMMWORD[96+r12]
- pxor xmm2,xmm12
- movdqu xmm14,XMMWORD[112+r12]
- pxor xmm4,xmm13
- movdqu XMMWORD[r13],xmm15
- lea r12,[128+r12]
- movdqu XMMWORD[16+r13],xmm0
- movdqu XMMWORD[32+r13],xmm5
- movdqu XMMWORD[48+r13],xmm3
- movdqu XMMWORD[64+r13],xmm1
- movdqu XMMWORD[80+r13],xmm6
- movdqu XMMWORD[96+r13],xmm2
- movdqu XMMWORD[112+r13],xmm4
- lea r13,[128+r13]
- sub r14,8
- jnc NEAR $L$cbc_dec_loop
-
-$L$cbc_dec_loop_done:
- add r14,8
- jz NEAR $L$cbc_dec_done
-
- movdqu xmm15,XMMWORD[r12]
- mov rax,rsp
- mov r10d,edx
- cmp r14,2
- jb NEAR $L$cbc_dec_one
- movdqu xmm0,XMMWORD[16+r12]
- je NEAR $L$cbc_dec_two
- movdqu xmm1,XMMWORD[32+r12]
- cmp r14,4
- jb NEAR $L$cbc_dec_three
- movdqu xmm2,XMMWORD[48+r12]
- je NEAR $L$cbc_dec_four
- movdqu xmm3,XMMWORD[64+r12]
- cmp r14,6
- jb NEAR $L$cbc_dec_five
- movdqu xmm4,XMMWORD[80+r12]
- je NEAR $L$cbc_dec_six
- movdqu xmm5,XMMWORD[96+r12]
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu xmm9,XMMWORD[32+r12]
- pxor xmm5,xmm8
- movdqu xmm10,XMMWORD[48+r12]
- pxor xmm3,xmm9
- movdqu xmm11,XMMWORD[64+r12]
- pxor xmm1,xmm10
- movdqu xmm12,XMMWORD[80+r12]
- pxor xmm6,xmm11
- movdqu xmm14,XMMWORD[96+r12]
- pxor xmm2,xmm12
- movdqu XMMWORD[r13],xmm15
- movdqu XMMWORD[16+r13],xmm0
- movdqu XMMWORD[32+r13],xmm5
- movdqu XMMWORD[48+r13],xmm3
- movdqu XMMWORD[64+r13],xmm1
- movdqu XMMWORD[80+r13],xmm6
- movdqu XMMWORD[96+r13],xmm2
- jmp NEAR $L$cbc_dec_done
-ALIGN 16
-$L$cbc_dec_six:
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu xmm9,XMMWORD[32+r12]
- pxor xmm5,xmm8
- movdqu xmm10,XMMWORD[48+r12]
- pxor xmm3,xmm9
- movdqu xmm11,XMMWORD[64+r12]
- pxor xmm1,xmm10
- movdqu xmm14,XMMWORD[80+r12]
- pxor xmm6,xmm11
- movdqu XMMWORD[r13],xmm15
- movdqu XMMWORD[16+r13],xmm0
- movdqu XMMWORD[32+r13],xmm5
- movdqu XMMWORD[48+r13],xmm3
- movdqu XMMWORD[64+r13],xmm1
- movdqu XMMWORD[80+r13],xmm6
- jmp NEAR $L$cbc_dec_done
-ALIGN 16
-$L$cbc_dec_five:
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu xmm9,XMMWORD[32+r12]
- pxor xmm5,xmm8
- movdqu xmm10,XMMWORD[48+r12]
- pxor xmm3,xmm9
- movdqu xmm14,XMMWORD[64+r12]
- pxor xmm1,xmm10
- movdqu XMMWORD[r13],xmm15
- movdqu XMMWORD[16+r13],xmm0
- movdqu XMMWORD[32+r13],xmm5
- movdqu XMMWORD[48+r13],xmm3
- movdqu XMMWORD[64+r13],xmm1
- jmp NEAR $L$cbc_dec_done
-ALIGN 16
-$L$cbc_dec_four:
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu xmm9,XMMWORD[32+r12]
- pxor xmm5,xmm8
- movdqu xmm14,XMMWORD[48+r12]
- pxor xmm3,xmm9
- movdqu XMMWORD[r13],xmm15
- movdqu XMMWORD[16+r13],xmm0
- movdqu XMMWORD[32+r13],xmm5
- movdqu XMMWORD[48+r13],xmm3
- jmp NEAR $L$cbc_dec_done
-ALIGN 16
-$L$cbc_dec_three:
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu xmm14,XMMWORD[32+r12]
- pxor xmm5,xmm8
- movdqu XMMWORD[r13],xmm15
- movdqu XMMWORD[16+r13],xmm0
- movdqu XMMWORD[32+r13],xmm5
- jmp NEAR $L$cbc_dec_done
-ALIGN 16
-$L$cbc_dec_two:
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm14,XMMWORD[16+r12]
- pxor xmm0,xmm7
- movdqu XMMWORD[r13],xmm15
- movdqu XMMWORD[16+r13],xmm0
- jmp NEAR $L$cbc_dec_done
-ALIGN 16
-$L$cbc_dec_one:
- movdqa XMMWORD[32+rbp],xmm14
- call _bsaes_decrypt8
- pxor xmm15,XMMWORD[32+rbp]
- movdqu xmm14,XMMWORD[r12]
- movdqu XMMWORD[r13],xmm15
- jmp NEAR $L$cbc_dec_done
-
-$L$cbc_dec_done:
- movdqu XMMWORD[rbx],xmm14
- lea rax,[rsp]
- pxor xmm0,xmm0
-$L$cbc_dec_bzero:
- movdqa XMMWORD[rax],xmm0
- movdqa XMMWORD[16+rax],xmm0
- lea rax,[32+rax]
- cmp rbp,rax
- ja NEAR $L$cbc_dec_bzero
-
- lea rax,[120+rbp]
-
- movaps xmm6,XMMWORD[64+rbp]
- movaps xmm7,XMMWORD[80+rbp]
- movaps xmm8,XMMWORD[96+rbp]
- movaps xmm9,XMMWORD[112+rbp]
- movaps xmm10,XMMWORD[128+rbp]
- movaps xmm11,XMMWORD[144+rbp]
- movaps xmm12,XMMWORD[160+rbp]
- movaps xmm13,XMMWORD[176+rbp]
- movaps xmm14,XMMWORD[192+rbp]
- movaps xmm15,XMMWORD[208+rbp]
- lea rax,[160+rax]
-$L$cbc_dec_tail:
- mov r15,QWORD[((-48))+rax]
-
- mov r14,QWORD[((-40))+rax]
-
- mov r13,QWORD[((-32))+rax]
-
- mov r12,QWORD[((-24))+rax]
-
- mov rbx,QWORD[((-16))+rax]
-
- mov rbp,QWORD[((-8))+rax]
-
- lea rsp,[rax]
-
-$L$cbc_dec_epilogue:
- DB 0F3h,0C3h ;repret
-
-
-
-global bsaes_ctr32_encrypt_blocks
-
-ALIGN 16
-bsaes_ctr32_encrypt_blocks:
-
-%ifndef NDEBUG
-%ifndef BORINGSSL_FIPS
-EXTERN BORINGSSL_function_hit
- mov BYTE[((BORINGSSL_function_hit+6))],1
-%endif
-%endif
- mov rax,rsp
-$L$ctr_enc_prologue:
- push rbp
-
- push rbx
-
- push r12
-
- push r13
-
- push r14
-
- push r15
-
- lea rsp,[((-72))+rsp]
-
- mov r10,QWORD[160+rsp]
- lea rsp,[((-160))+rsp]
- movaps XMMWORD[64+rsp],xmm6
- movaps XMMWORD[80+rsp],xmm7
- movaps XMMWORD[96+rsp],xmm8
- movaps XMMWORD[112+rsp],xmm9
- movaps XMMWORD[128+rsp],xmm10
- movaps XMMWORD[144+rsp],xmm11
- movaps XMMWORD[160+rsp],xmm12
- movaps XMMWORD[176+rsp],xmm13
- movaps XMMWORD[192+rsp],xmm14
- movaps XMMWORD[208+rsp],xmm15
-$L$ctr_enc_body:
- mov rbp,rsp
-
- movdqu xmm0,XMMWORD[r10]
- mov eax,DWORD[240+r9]
- mov r12,rcx
- mov r13,rdx
- mov r14,r8
- mov r15,r9
- movdqa XMMWORD[32+rbp],xmm0
-
-
-
- mov ebx,eax
- shl rax,7
- sub rax,96
- sub rsp,rax
-
- mov rax,rsp
- mov rcx,r15
- mov r10d,ebx
- call _bsaes_key_convert
- pxor xmm7,xmm6
- movdqa XMMWORD[rax],xmm7
-
- movdqa xmm8,XMMWORD[rsp]
- lea r11,[$L$ADD1]
- movdqa xmm15,XMMWORD[32+rbp]
- movdqa xmm7,XMMWORD[((-32))+r11]
-DB 102,68,15,56,0,199
-DB 102,68,15,56,0,255
- movdqa XMMWORD[rsp],xmm8
- jmp NEAR $L$ctr_enc_loop
-ALIGN 16
-$L$ctr_enc_loop:
- movdqa XMMWORD[32+rbp],xmm15
- movdqa xmm0,xmm15
- movdqa xmm1,xmm15
- paddd xmm0,XMMWORD[r11]
- movdqa xmm2,xmm15
- paddd xmm1,XMMWORD[16+r11]
- movdqa xmm3,xmm15
- paddd xmm2,XMMWORD[32+r11]
- movdqa xmm4,xmm15
- paddd xmm3,XMMWORD[48+r11]
- movdqa xmm5,xmm15
- paddd xmm4,XMMWORD[64+r11]
- movdqa xmm6,xmm15
- paddd xmm5,XMMWORD[80+r11]
- paddd xmm6,XMMWORD[96+r11]
-
-
-
- movdqa xmm8,XMMWORD[rsp]
- lea rax,[16+rsp]
- movdqa xmm7,XMMWORD[((-16))+r11]
- pxor xmm15,xmm8
- pxor xmm0,xmm8
- pxor xmm1,xmm8
- pxor xmm2,xmm8
-DB 102,68,15,56,0,255
-DB 102,15,56,0,199
- pxor xmm3,xmm8
- pxor xmm4,xmm8
-DB 102,15,56,0,207
-DB 102,15,56,0,215
- pxor xmm5,xmm8
- pxor xmm6,xmm8
-DB 102,15,56,0,223
-DB 102,15,56,0,231
-DB 102,15,56,0,239
-DB 102,15,56,0,247
- lea r11,[$L$BS0]
- mov r10d,ebx
-
- call _bsaes_encrypt8_bitslice
-
- sub r14,8
- jc NEAR $L$ctr_enc_loop_done
-
- movdqu xmm7,XMMWORD[r12]
- movdqu xmm8,XMMWORD[16+r12]
- movdqu xmm9,XMMWORD[32+r12]
- movdqu xmm10,XMMWORD[48+r12]
- movdqu xmm11,XMMWORD[64+r12]
- movdqu xmm12,XMMWORD[80+r12]
- movdqu xmm13,XMMWORD[96+r12]
- movdqu xmm14,XMMWORD[112+r12]
- lea r12,[128+r12]
- pxor xmm7,xmm15
- movdqa xmm15,XMMWORD[32+rbp]
- pxor xmm0,xmm8
- movdqu XMMWORD[r13],xmm7
- pxor xmm3,xmm9
- movdqu XMMWORD[16+r13],xmm0
- pxor xmm5,xmm10
- movdqu XMMWORD[32+r13],xmm3
- pxor xmm2,xmm11
- movdqu XMMWORD[48+r13],xmm5
- pxor xmm6,xmm12
- movdqu XMMWORD[64+r13],xmm2
- pxor xmm1,xmm13
- movdqu XMMWORD[80+r13],xmm6
- pxor xmm4,xmm14
- movdqu XMMWORD[96+r13],xmm1
- lea r11,[$L$ADD1]
- movdqu XMMWORD[112+r13],xmm4
- lea r13,[128+r13]
- paddd xmm15,XMMWORD[112+r11]
- jnz NEAR $L$ctr_enc_loop
-
- jmp NEAR $L$ctr_enc_done
-ALIGN 16
-$L$ctr_enc_loop_done:
- add r14,8
- movdqu xmm7,XMMWORD[r12]
- pxor xmm15,xmm7
- movdqu XMMWORD[r13],xmm15
- cmp r14,2
- jb NEAR $L$ctr_enc_done
- movdqu xmm8,XMMWORD[16+r12]
- pxor xmm0,xmm8
- movdqu XMMWORD[16+r13],xmm0
- je NEAR $L$ctr_enc_done
- movdqu xmm9,XMMWORD[32+r12]
- pxor xmm3,xmm9
- movdqu XMMWORD[32+r13],xmm3
- cmp r14,4
- jb NEAR $L$ctr_enc_done
- movdqu xmm10,XMMWORD[48+r12]
- pxor xmm5,xmm10
- movdqu XMMWORD[48+r13],xmm5
- je NEAR $L$ctr_enc_done
- movdqu xmm11,XMMWORD[64+r12]
- pxor xmm2,xmm11
- movdqu XMMWORD[64+r13],xmm2
- cmp r14,6
- jb NEAR $L$ctr_enc_done
- movdqu xmm12,XMMWORD[80+r12]
- pxor xmm6,xmm12
- movdqu XMMWORD[80+r13],xmm6
- je NEAR $L$ctr_enc_done
- movdqu xmm13,XMMWORD[96+r12]
- pxor xmm1,xmm13
- movdqu XMMWORD[96+r13],xmm1
-
-
-
-$L$ctr_enc_done:
- lea rax,[rsp]
- pxor xmm0,xmm0
-$L$ctr_enc_bzero:
- movdqa XMMWORD[rax],xmm0
- movdqa XMMWORD[16+rax],xmm0
- lea rax,[32+rax]
- cmp rbp,rax
- ja NEAR $L$ctr_enc_bzero
-
- lea rax,[120+rbp]
-
- movaps xmm6,XMMWORD[64+rbp]
- movaps xmm7,XMMWORD[80+rbp]
- movaps xmm8,XMMWORD[96+rbp]
- movaps xmm9,XMMWORD[112+rbp]
- movaps xmm10,XMMWORD[128+rbp]
- movaps xmm11,XMMWORD[144+rbp]
- movaps xmm12,XMMWORD[160+rbp]
- movaps xmm13,XMMWORD[176+rbp]
- movaps xmm14,XMMWORD[192+rbp]
- movaps xmm15,XMMWORD[208+rbp]
- lea rax,[160+rax]
-$L$ctr_enc_tail:
- mov r15,QWORD[((-48))+rax]
-
- mov r14,QWORD[((-40))+rax]
-
- mov r13,QWORD[((-32))+rax]
-
- mov r12,QWORD[((-24))+rax]
-
- mov rbx,QWORD[((-16))+rax]
-
- mov rbp,QWORD[((-8))+rax]
-
- lea rsp,[rax]
-
-$L$ctr_enc_epilogue:
- DB 0F3h,0C3h ;repret
-
-
-
-ALIGN 64
-_bsaes_const:
-$L$M0ISR:
- DQ 0x0a0e0206070b0f03,0x0004080c0d010509
-$L$ISRM0:
- DQ 0x01040b0e0205080f,0x0306090c00070a0d
-$L$ISR:
- DQ 0x0504070602010003,0x0f0e0d0c080b0a09
-$L$BS0:
- DQ 0x5555555555555555,0x5555555555555555
-$L$BS1:
- DQ 0x3333333333333333,0x3333333333333333
-$L$BS2:
- DQ 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
-$L$SR:
- DQ 0x0504070600030201,0x0f0e0d0c0a09080b
-$L$SRM0:
- DQ 0x0304090e00050a0f,0x01060b0c0207080d
-$L$M0SR:
- DQ 0x0a0e02060f03070b,0x0004080c05090d01
-$L$SWPUP:
- DQ 0x0706050403020100,0x0c0d0e0f0b0a0908
-$L$SWPUPM0SR:
- DQ 0x0a0d02060c03070b,0x0004080f05090e01
-$L$ADD1:
- DQ 0x0000000000000000,0x0000000100000000
-$L$ADD2:
- DQ 0x0000000000000000,0x0000000200000000
-$L$ADD3:
- DQ 0x0000000000000000,0x0000000300000000
-$L$ADD4:
- DQ 0x0000000000000000,0x0000000400000000
-$L$ADD5:
- DQ 0x0000000000000000,0x0000000500000000
-$L$ADD6:
- DQ 0x0000000000000000,0x0000000600000000
-$L$ADD7:
- DQ 0x0000000000000000,0x0000000700000000
-$L$ADD8:
- DQ 0x0000000000000000,0x0000000800000000
-$L$xts_magic:
- DD 0x87,0,1,0
-$L$masks:
- DQ 0x0101010101010101,0x0101010101010101
- DQ 0x0202020202020202,0x0202020202020202
- DQ 0x0404040404040404,0x0404040404040404
- DQ 0x0808080808080808,0x0808080808080808
-$L$M0:
- DQ 0x02060a0e03070b0f,0x0004080c0105090d
-$L$63:
- DQ 0x6363636363636363,0x6363636363636363
-DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
-DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
-DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
-DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
-DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0
-ALIGN 64
-
-EXTERN __imp_RtlVirtualUnwind
-
-ALIGN 16
-se_handler:
- push rsi
- push rdi
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- pushfq
- sub rsp,64
-
- mov rax,QWORD[120+r8]
- mov rbx,QWORD[248+r8]
-
- mov rsi,QWORD[8+r9]
- mov r11,QWORD[56+r9]
-
- mov r10d,DWORD[r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jbe NEAR $L$in_prologue
-
- mov r10d,DWORD[4+r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jae NEAR $L$in_prologue
-
- mov r10d,DWORD[8+r11]
- lea r10,[r10*1+rsi]
- cmp rbx,r10
- jae NEAR $L$in_tail
-
- mov rax,QWORD[160+r8]
-
- lea rsi,[64+rax]
- lea rdi,[512+r8]
- mov ecx,20
- DD 0xa548f3fc
- lea rax,[((160+120))+rax]
-
-$L$in_tail:
- mov rbp,QWORD[((-48))+rax]
- mov rbx,QWORD[((-40))+rax]
- mov r12,QWORD[((-32))+rax]
- mov r13,QWORD[((-24))+rax]
- mov r14,QWORD[((-16))+rax]
- mov r15,QWORD[((-8))+rax]
- mov QWORD[144+r8],rbx
- mov QWORD[160+r8],rbp
- mov QWORD[216+r8],r12
- mov QWORD[224+r8],r13
- mov QWORD[232+r8],r14
- mov QWORD[240+r8],r15
-
-$L$in_prologue:
- mov QWORD[152+r8],rax
-
- mov rdi,QWORD[40+r9]
- mov rsi,r8
- mov ecx,154
- DD 0xa548f3fc
-
- mov rsi,r9
- xor rcx,rcx
- mov rdx,QWORD[8+rsi]
- mov r8,QWORD[rsi]
- mov r9,QWORD[16+rsi]
- mov r10,QWORD[40+rsi]
- lea r11,[56+rsi]
- lea r12,[24+rsi]
- mov QWORD[32+rsp],r10
- mov QWORD[40+rsp],r11
- mov QWORD[48+rsp],r12
- mov QWORD[56+rsp],rcx
- call QWORD[__imp_RtlVirtualUnwind]
-
- mov eax,1
- add rsp,64
- popfq
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
- pop rdi
- pop rsi
- DB 0F3h,0C3h ;repret
-
-
-section .pdata rdata align=4
-ALIGN 4
- DD $L$cbc_dec_prologue wrt ..imagebase
- DD $L$cbc_dec_epilogue wrt ..imagebase
- DD $L$cbc_dec_info wrt ..imagebase
-
- DD $L$ctr_enc_prologue wrt ..imagebase
- DD $L$ctr_enc_epilogue wrt ..imagebase
- DD $L$ctr_enc_info wrt ..imagebase
-
-section .xdata rdata align=8
-ALIGN 8
-$L$cbc_dec_info:
-DB 9,0,0,0
- DD se_handler wrt ..imagebase
- DD $L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase
- DD $L$cbc_dec_tail wrt ..imagebase
- DD 0
-$L$ctr_enc_info:
-DB 9,0,0,0
- DD se_handler wrt ..imagebase
- DD $L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase
- DD $L$ctr_enc_tail wrt ..imagebase
- DD 0
diff --git a/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm b/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
index 593d166c..10092c8a 100644
--- a/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
+++ b/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
@@ -120,6 +120,181 @@ DB 102,15,56,0,193
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_encrypt_core_2x:
+
+ mov r9,rdx
+ mov r11,16
+ mov eax,DWORD[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm7,xmm9
+ movdqa xmm2,XMMWORD[$L$k_ipt]
+ movdqa xmm8,xmm2
+ pandn xmm1,xmm0
+ pandn xmm7,xmm6
+ movdqu xmm5,XMMWORD[r9]
+
+ psrld xmm1,4
+ psrld xmm7,4
+ pand xmm0,xmm9
+ pand xmm6,xmm9
+DB 102,15,56,0,208
+DB 102,68,15,56,0,198
+ movdqa xmm0,XMMWORD[(($L$k_ipt+16))]
+ movdqa xmm6,xmm0
+DB 102,15,56,0,193
+DB 102,15,56,0,247
+ pxor xmm2,xmm5
+ pxor xmm8,xmm5
+ add r9,16
+ pxor xmm0,xmm2
+ pxor xmm6,xmm8
+ lea r10,[$L$k_mc_backward]
+ jmp NEAR $L$enc2x_entry
+
+ALIGN 16
+$L$enc2x_loop:
+
+ movdqa xmm4,XMMWORD[$L$k_sb1]
+ movdqa xmm0,XMMWORD[(($L$k_sb1+16))]
+ movdqa xmm12,xmm4
+ movdqa xmm6,xmm0
+DB 102,15,56,0,226
+DB 102,69,15,56,0,224
+DB 102,15,56,0,195
+DB 102,65,15,56,0,243
+ pxor xmm4,xmm5
+ pxor xmm12,xmm5
+ movdqa xmm5,XMMWORD[$L$k_sb2]
+ movdqa xmm13,xmm5
+ pxor xmm0,xmm4
+ pxor xmm6,xmm12
+ movdqa xmm1,XMMWORD[((-64))+r10*1+r11]
+
+DB 102,15,56,0,234
+DB 102,69,15,56,0,232
+ movdqa xmm4,XMMWORD[r10*1+r11]
+
+ movdqa xmm2,XMMWORD[(($L$k_sb2+16))]
+ movdqa xmm8,xmm2
+DB 102,15,56,0,211
+DB 102,69,15,56,0,195
+ movdqa xmm3,xmm0
+ movdqa xmm11,xmm6
+ pxor xmm2,xmm5
+ pxor xmm8,xmm13
+DB 102,15,56,0,193
+DB 102,15,56,0,241
+ add r9,16
+ pxor xmm0,xmm2
+ pxor xmm6,xmm8
+DB 102,15,56,0,220
+DB 102,68,15,56,0,220
+ add r11,16
+ pxor xmm3,xmm0
+ pxor xmm11,xmm6
+DB 102,15,56,0,193
+DB 102,15,56,0,241
+ and r11,0x30
+ sub rax,1
+ pxor xmm0,xmm3
+ pxor xmm6,xmm11
+
+$L$enc2x_entry:
+
+ movdqa xmm1,xmm9
+ movdqa xmm7,xmm9
+ movdqa xmm5,XMMWORD[(($L$k_inv+16))]
+ movdqa xmm13,xmm5
+ pandn xmm1,xmm0
+ pandn xmm7,xmm6
+ psrld xmm1,4
+ psrld xmm7,4
+ pand xmm0,xmm9
+ pand xmm6,xmm9
+DB 102,15,56,0,232
+DB 102,68,15,56,0,238
+ movdqa xmm3,xmm10
+ movdqa xmm11,xmm10
+ pxor xmm0,xmm1
+ pxor xmm6,xmm7
+DB 102,15,56,0,217
+DB 102,68,15,56,0,223
+ movdqa xmm4,xmm10
+ movdqa xmm12,xmm10
+ pxor xmm3,xmm5
+ pxor xmm11,xmm13
+DB 102,15,56,0,224
+DB 102,68,15,56,0,230
+ movdqa xmm2,xmm10
+ movdqa xmm8,xmm10
+ pxor xmm4,xmm5
+ pxor xmm12,xmm13
+DB 102,15,56,0,211
+DB 102,69,15,56,0,195
+ movdqa xmm3,xmm10
+ movdqa xmm11,xmm10
+ pxor xmm2,xmm0
+ pxor xmm8,xmm6
+DB 102,15,56,0,220
+DB 102,69,15,56,0,220
+ movdqu xmm5,XMMWORD[r9]
+
+ pxor xmm3,xmm1
+ pxor xmm11,xmm7
+ jnz NEAR $L$enc2x_loop
+
+
+ movdqa xmm4,XMMWORD[((-96))+r10]
+ movdqa xmm0,XMMWORD[((-80))+r10]
+ movdqa xmm12,xmm4
+ movdqa xmm6,xmm0
+DB 102,15,56,0,226
+DB 102,69,15,56,0,224
+ pxor xmm4,xmm5
+ pxor xmm12,xmm5
+DB 102,15,56,0,195
+DB 102,65,15,56,0,243
+ movdqa xmm1,XMMWORD[64+r10*1+r11]
+
+ pxor xmm0,xmm4
+ pxor xmm6,xmm12
+DB 102,15,56,0,193
+DB 102,15,56,0,241
+ DB 0F3h,0C3h ;repret
+
+
+
+
+
+
+
+
+
ALIGN 16
_vpaes_decrypt_core:
@@ -929,6 +1104,105 @@ $L$cbc_abort:
DB 0F3h,0C3h ;repret
$L$SEH_end_vpaes_cbc_encrypt:
+global vpaes_ctr32_encrypt_blocks
+
+ALIGN 16
+vpaes_ctr32_encrypt_blocks:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_ctr32_encrypt_blocks:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+
+ xchg rdx,rcx
+ test rcx,rcx
+ jz NEAR $L$ctr32_abort
+ lea rsp,[((-184))+rsp]
+ movaps XMMWORD[16+rsp],xmm6
+ movaps XMMWORD[32+rsp],xmm7
+ movaps XMMWORD[48+rsp],xmm8
+ movaps XMMWORD[64+rsp],xmm9
+ movaps XMMWORD[80+rsp],xmm10
+ movaps XMMWORD[96+rsp],xmm11
+ movaps XMMWORD[112+rsp],xmm12
+ movaps XMMWORD[128+rsp],xmm13
+ movaps XMMWORD[144+rsp],xmm14
+ movaps XMMWORD[160+rsp],xmm15
+$L$ctr32_body:
+ movdqu xmm0,XMMWORD[r8]
+ movdqa xmm8,XMMWORD[$L$ctr_add_one]
+ sub rsi,rdi
+ call _vpaes_preheat
+ movdqa xmm6,xmm0
+ pshufb xmm6,XMMWORD[$L$rev_ctr]
+
+ test rcx,1
+ jz NEAR $L$ctr32_prep_loop
+
+
+
+ movdqu xmm7,XMMWORD[rdi]
+ call _vpaes_encrypt_core
+ pxor xmm0,xmm7
+ paddd xmm6,xmm8
+ movdqu XMMWORD[rdi*1+rsi],xmm0
+ sub rcx,1
+ lea rdi,[16+rdi]
+ jz NEAR $L$ctr32_done
+
+$L$ctr32_prep_loop:
+
+
+ movdqa xmm14,xmm6
+ movdqa xmm15,xmm6
+ paddd xmm15,xmm8
+
+$L$ctr32_loop:
+ movdqa xmm1,XMMWORD[$L$rev_ctr]
+ movdqa xmm0,xmm14
+ movdqa xmm6,xmm15
+DB 102,15,56,0,193
+DB 102,15,56,0,241
+ call _vpaes_encrypt_core_2x
+ movdqu xmm1,XMMWORD[rdi]
+ movdqu xmm2,XMMWORD[16+rdi]
+ movdqa xmm3,XMMWORD[$L$ctr_add_two]
+ pxor xmm0,xmm1
+ pxor xmm6,xmm2
+ paddd xmm14,xmm3
+ paddd xmm15,xmm3
+ movdqu XMMWORD[rdi*1+rsi],xmm0
+ movdqu XMMWORD[16+rdi*1+rsi],xmm6
+ sub rcx,2
+ lea rdi,[32+rdi]
+ jnz NEAR $L$ctr32_loop
+
+$L$ctr32_done:
+ movaps xmm6,XMMWORD[16+rsp]
+ movaps xmm7,XMMWORD[32+rsp]
+ movaps xmm8,XMMWORD[48+rsp]
+ movaps xmm9,XMMWORD[64+rsp]
+ movaps xmm10,XMMWORD[80+rsp]
+ movaps xmm11,XMMWORD[96+rsp]
+ movaps xmm12,XMMWORD[112+rsp]
+ movaps xmm13,XMMWORD[128+rsp]
+ movaps xmm14,XMMWORD[144+rsp]
+ movaps xmm15,XMMWORD[160+rsp]
+ lea rsp,[184+rsp]
+$L$ctr32_epilogue:
+$L$ctr32_abort:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_vpaes_ctr32_encrypt_blocks:
@@ -1051,6 +1325,17 @@ $L$k_dsbe:
$L$k_dsbo:
DQ 0x1387EA537EF94000,0xC7AA6DB9D4943E2D
DQ 0x12D7560F93441D00,0xCA4B8159D8C58E9C
+
+
+$L$rev_ctr:
+ DQ 0x0706050403020100,0x0c0d0e0f0b0a0908
+
+
+$L$ctr_add_one:
+ DQ 0x0000000000000000,0x0000000100000000
+$L$ctr_add_two:
+ DQ 0x0000000000000000,0x0000000200000000
+
DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
@@ -1159,6 +1444,10 @@ ALIGN 4
DD $L$SEH_end_vpaes_cbc_encrypt wrt ..imagebase
DD $L$SEH_info_vpaes_cbc_encrypt wrt ..imagebase
+ DD $L$SEH_begin_vpaes_ctr32_encrypt_blocks wrt ..imagebase
+ DD $L$SEH_end_vpaes_ctr32_encrypt_blocks wrt ..imagebase
+ DD $L$SEH_info_vpaes_ctr32_encrypt_blocks wrt ..imagebase
+
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_vpaes_set_encrypt_key:
@@ -1181,3 +1470,7 @@ $L$SEH_info_vpaes_cbc_encrypt:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$cbc_body wrt ..imagebase,$L$cbc_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_ctr32_encrypt_blocks:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase