summaryrefslogtreecommitdiff
path: root/linux-arm/crypto/fipsmodule
diff options
context:
space:
mode:
Diffstat (limited to 'linux-arm/crypto/fipsmodule')
-rw-r--r--linux-arm/crypto/fipsmodule/armv4-mont-linux.S2
-rw-r--r--linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S1
-rw-r--r--linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S57
-rw-r--r--linux-arm/crypto/fipsmodule/sha256-armv4-linux.S170
-rw-r--r--linux-arm/crypto/fipsmodule/sha512-armv4-linux.S50
5 files changed, 112 insertions, 168 deletions
diff --git a/linux-arm/crypto/fipsmodule/armv4-mont-linux.S b/linux-arm/crypto/fipsmodule/armv4-mont-linux.S
index 0ec68610..8073aa62 100644
--- a/linux-arm/crypto/fipsmodule/armv4-mont-linux.S
+++ b/linux-arm/crypto/fipsmodule/armv4-mont-linux.S
@@ -193,7 +193,7 @@ bn_mul_mont:
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
-#if __ARM_ARCH__>=5
+#if __ARM_ARCH>=5
bx lr @ bx lr
#else
tst lr,#1
diff --git a/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S b/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S
index 49eda8d6..01a9ead2 100644
--- a/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S
+++ b/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S
@@ -67,7 +67,6 @@
# define VFP_ABI_FRAME 0
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
#endif
diff --git a/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S b/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S
index 660ccbaa..323e6e6e 100644
--- a/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S
+++ b/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S
@@ -14,25 +14,12 @@
.code 32
#endif
-.globl sha1_block_data_order
-.hidden sha1_block_data_order
-.type sha1_block_data_order,%function
+.globl sha1_block_data_order_nohw
+.hidden sha1_block_data_order_nohw
+.type sha1_block_data_order_nohw,%function
.align 5
-sha1_block_data_order:
-#if __ARM_MAX_ARCH__>=7
-.Lsha1_block:
- adr r3,.Lsha1_block
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
-#ifdef __APPLE__
- ldr r12,[r12]
-#endif
- tst r12,#ARMV8_SHA1
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
+sha1_block_data_order_nohw:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
ldmia r0,{r3,r4,r5,r6,r7}
@@ -44,7 +31,7 @@ sha1_block_data_order:
mov r6,r6,ror#30
mov r7,r7,ror#30 @ [6]
.L_00_15:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@@ -69,7 +56,7 @@ sha1_block_data_order:
eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r7,r7,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@@ -94,7 +81,7 @@ sha1_block_data_order:
eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r6,r6,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@@ -119,7 +106,7 @@ sha1_block_data_order:
eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r5,r5,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@@ -144,7 +131,7 @@ sha1_block_data_order:
eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]!
add r4,r4,r10 @ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@@ -177,7 +164,7 @@ sha1_block_data_order:
#endif
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r10,[r1,#2]
ldrb r9,[r1,#3]
ldrb r11,[r1,#1]
@@ -475,7 +462,7 @@ sha1_block_data_order:
teq r1,r2
bne .Lloop @ [+18], total 1307
-#if __ARM_ARCH__>=5
+#if __ARM_ARCH>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
@@ -483,17 +470,13 @@ sha1_block_data_order:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size sha1_block_data_order,.-sha1_block_data_order
+.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw
.align 5
.LK_00_19:.word 0x5a827999
.LK_20_39:.word 0x6ed9eba1
.LK_40_59:.word 0x8f1bbcdc
.LK_60_79:.word 0xca62c1d6
-#if __ARM_MAX_ARCH__>=7
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.Lsha1_block
-#endif
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 5
@@ -501,10 +484,11 @@ sha1_block_data_order:
.arch armv7-a
.fpu neon
+.globl sha1_block_data_order_neon
+.hidden sha1_block_data_order_neon
.type sha1_block_data_order_neon,%function
.align 4
sha1_block_data_order_neon:
-.LNEON:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
@ dmb @ errata #451034 on early Cortex A8
@@ -1360,10 +1344,11 @@ sha1_block_data_order_neon:
# define INST(a,b,c,d) .byte a,b,c,d|0x10
# endif
-.type sha1_block_data_order_armv8,%function
+.globl sha1_block_data_order_hw
+.hidden sha1_block_data_order_hw
+.type sha1_block_data_order_hw,%function
.align 5
-sha1_block_data_order_armv8:
-.LARMv8:
+sha1_block_data_order_hw:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
veor q1,q1,q1
@@ -1491,10 +1476,6 @@ sha1_block_data_order_armv8:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
bx lr @ bx lr
-.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
-#endif
-#if __ARM_MAX_ARCH__>=7
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
+.size sha1_block_data_order_hw,.-sha1_block_data_order_hw
#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
diff --git a/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S b/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S
index 2450322e..82cc57ab 100644
--- a/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S
+++ b/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S
@@ -52,7 +52,7 @@
#ifndef __KERNEL__
# include <openssl/arm_arch.h>
#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_ARCH __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
#endif
@@ -90,47 +90,26 @@ K256:
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.Lsha256_block_data_order
-#endif
.align 5
-.globl sha256_block_data_order
-.hidden sha256_block_data_order
-.type sha256_block_data_order,%function
-sha256_block_data_order:
-.Lsha256_block_data_order:
-#if __ARM_ARCH__<7 && !defined(__thumb2__)
- sub r3,pc,#8 @ sha256_block_data_order
-#else
- adr r3,.Lsha256_block_data_order
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
-#ifdef __APPLE__
- ldr r12,[r12]
-#endif
- tst r12,#ARMV8_SHA256
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
+.globl sha256_block_data_order_nohw
+.hidden sha256_block_data_order_nohw
+.type sha256_block_data_order_nohw,%function
+sha256_block_data_order_nohw:
add r2,r1,r2,lsl#6 @ len to point at the end of inp
stmdb sp!,{r0,r1,r2,r4-r11,lr}
ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
- sub r14,r3,#256+32 @ K256
+ adr r14,K256
sub sp,sp,#16*4 @ alloca(X[16])
.Loop:
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4
# else
ldrb r2,[r1,#3]
# endif
eor r3,r5,r6 @ magic
eor r12,r12,r12
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 0
# if 0==15
str r1,[sp,#17*4] @ make room for r1
@@ -171,7 +150,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 0<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -188,7 +167,7 @@ sha256_block_data_order:
eor r3,r3,r5 @ Maj(a,b,c)
add r11,r11,r0,ror#2 @ h+=Sigma0(a)
@ add r11,r11,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 1
# if 1==15
str r1,[sp,#17*4] @ make room for r1
@@ -229,7 +208,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 1<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -246,7 +225,7 @@ sha256_block_data_order:
eor r12,r12,r4 @ Maj(a,b,c)
add r10,r10,r0,ror#2 @ h+=Sigma0(a)
@ add r10,r10,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 2
# if 2==15
str r1,[sp,#17*4] @ make room for r1
@@ -287,7 +266,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 2<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -304,7 +283,7 @@ sha256_block_data_order:
eor r3,r3,r11 @ Maj(a,b,c)
add r9,r9,r0,ror#2 @ h+=Sigma0(a)
@ add r9,r9,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 3
# if 3==15
str r1,[sp,#17*4] @ make room for r1
@@ -345,7 +324,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 3<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -362,7 +341,7 @@ sha256_block_data_order:
eor r12,r12,r10 @ Maj(a,b,c)
add r8,r8,r0,ror#2 @ h+=Sigma0(a)
@ add r8,r8,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 4
# if 4==15
str r1,[sp,#17*4] @ make room for r1
@@ -403,7 +382,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 4<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -420,7 +399,7 @@ sha256_block_data_order:
eor r3,r3,r9 @ Maj(a,b,c)
add r7,r7,r0,ror#2 @ h+=Sigma0(a)
@ add r7,r7,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 5
# if 5==15
str r1,[sp,#17*4] @ make room for r1
@@ -461,7 +440,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 5<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -478,7 +457,7 @@ sha256_block_data_order:
eor r12,r12,r8 @ Maj(a,b,c)
add r6,r6,r0,ror#2 @ h+=Sigma0(a)
@ add r6,r6,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 6
# if 6==15
str r1,[sp,#17*4] @ make room for r1
@@ -519,7 +498,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 6<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -536,7 +515,7 @@ sha256_block_data_order:
eor r3,r3,r7 @ Maj(a,b,c)
add r5,r5,r0,ror#2 @ h+=Sigma0(a)
@ add r5,r5,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 7
# if 7==15
str r1,[sp,#17*4] @ make room for r1
@@ -577,7 +556,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 7<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -594,7 +573,7 @@ sha256_block_data_order:
eor r12,r12,r6 @ Maj(a,b,c)
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
@ add r4,r4,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 8
# if 8==15
str r1,[sp,#17*4] @ make room for r1
@@ -635,7 +614,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 8<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -652,7 +631,7 @@ sha256_block_data_order:
eor r3,r3,r5 @ Maj(a,b,c)
add r11,r11,r0,ror#2 @ h+=Sigma0(a)
@ add r11,r11,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 9
# if 9==15
str r1,[sp,#17*4] @ make room for r1
@@ -693,7 +672,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 9<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -710,7 +689,7 @@ sha256_block_data_order:
eor r12,r12,r4 @ Maj(a,b,c)
add r10,r10,r0,ror#2 @ h+=Sigma0(a)
@ add r10,r10,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 10
# if 10==15
str r1,[sp,#17*4] @ make room for r1
@@ -751,7 +730,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 10<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -768,7 +747,7 @@ sha256_block_data_order:
eor r3,r3,r11 @ Maj(a,b,c)
add r9,r9,r0,ror#2 @ h+=Sigma0(a)
@ add r9,r9,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 11
# if 11==15
str r1,[sp,#17*4] @ make room for r1
@@ -809,7 +788,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 11<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -826,7 +805,7 @@ sha256_block_data_order:
eor r12,r12,r10 @ Maj(a,b,c)
add r8,r8,r0,ror#2 @ h+=Sigma0(a)
@ add r8,r8,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 12
# if 12==15
str r1,[sp,#17*4] @ make room for r1
@@ -867,7 +846,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 12<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -884,7 +863,7 @@ sha256_block_data_order:
eor r3,r3,r9 @ Maj(a,b,c)
add r7,r7,r0,ror#2 @ h+=Sigma0(a)
@ add r7,r7,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 13
# if 13==15
str r1,[sp,#17*4] @ make room for r1
@@ -925,7 +904,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 13<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -942,7 +921,7 @@ sha256_block_data_order:
eor r12,r12,r8 @ Maj(a,b,c)
add r6,r6,r0,ror#2 @ h+=Sigma0(a)
@ add r6,r6,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 14
# if 14==15
str r1,[sp,#17*4] @ make room for r1
@@ -983,7 +962,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 14<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1000,7 +979,7 @@ sha256_block_data_order:
eor r3,r3,r7 @ Maj(a,b,c)
add r5,r5,r0,ror#2 @ h+=Sigma0(a)
@ add r5,r5,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
@ ldr r2,[r1],#4 @ 15
# if 15==15
str r1,[sp,#17*4] @ make room for r1
@@ -1041,7 +1020,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 15<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1091,7 +1070,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 16<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1140,7 +1119,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 17<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1189,7 +1168,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 18<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1238,7 +1217,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 19<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1287,7 +1266,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 20<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1336,7 +1315,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 21<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1385,7 +1364,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 22<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1434,7 +1413,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 23<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1483,7 +1462,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 24<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1532,7 +1511,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 25<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1581,7 +1560,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 26<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1630,7 +1609,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 27<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1679,7 +1658,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 28<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1728,7 +1707,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 29<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1777,7 +1756,7 @@ sha256_block_data_order:
cmp r12,#0xf2 @ done?
#endif
#if 30<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1826,7 +1805,7 @@ sha256_block_data_order:
cmp r3,#0xf2 @ done?
#endif
#if 31<15
-# if __ARM_ARCH__>=7
+# if __ARM_ARCH>=7
ldr r2,[r1],#4 @ prefetch
# else
ldrb r2,[r1,#3]
@@ -1843,7 +1822,7 @@ sha256_block_data_order:
eor r12,r12,r6 @ Maj(a,b,c)
add r4,r4,r0,ror#2 @ h+=Sigma0(a)
@ add r4,r4,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
ite eq @ Thumb2 thing, sanity check in ARM
#endif
ldreq r3,[sp,#16*4] @ pull ctx
@@ -1874,7 +1853,7 @@ sha256_block_data_order:
bne .Loop
add sp,sp,#19*4 @ destroy frame
-#if __ARM_ARCH__>=5
+#if __ARM_ARCH>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
@@ -1882,7 +1861,7 @@ sha256_block_data_order:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size sha256_block_data_order,.-sha256_block_data_order
+.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
@@ -1893,10 +1872,12 @@ sha256_block_data_order:
.align 5
.skip 16
sha256_block_data_order_neon:
-.LNEON:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r11,sp,#16*4+16
+ @ In Arm mode, the following ADR runs up against the limits of encodable
+ @ offsets. It only fits because the offset, when the ADR is placed here,
+ @ is a multiple of 16.
adr r14,K256
bic r11,r11,#15 @ align for 128-bit stores
mov r12,sp
@@ -2679,12 +2660,27 @@ sha256_block_data_order_neon:
# define INST(a,b,c,d) .byte a,b,c,d
# endif
-.type sha256_block_data_order_armv8,%function
+.LK256_shortcut:
+@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
+#if defined(__thumb2__)
+.word K256-(.LK256_add+4)
+#else
+.word K256-(.LK256_add+8)
+#endif
+
+.globl sha256_block_data_order_hw
+.hidden sha256_block_data_order_hw
+.type sha256_block_data_order_hw,%function
.align 5
-sha256_block_data_order_armv8:
-.LARMv8:
+sha256_block_data_order_hw:
+ @ K256 is too far to reference from one ADR command in Thumb mode. In
+ @ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
+ @ boundary. For simplicity, just load the offset from .LK256_shortcut.
+ ldr r3,.LK256_shortcut
+.LK256_add:
+ add r3,pc,r3
+
vld1.32 {q0,q1},[r0]
- sub r3,r3,#256+32
add r2,r1,r2,lsl#6 @ len to point at the end of inp
b .Loop_v8
@@ -2816,13 +2812,9 @@ sha256_block_data_order_armv8:
vst1.32 {q0,q1},[r0]
bx lr @ bx lr
-.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+.size sha256_block_data_order_hw,.-sha256_block_data_order_hw
#endif
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
-#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
diff --git a/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S b/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S
index 9aed7cb6..55006865 100644
--- a/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S
+++ b/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S
@@ -63,7 +63,6 @@
# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
# define VFP_ABI_POP vldmia sp!,{d8-d15}
#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
# define VFP_ABI_PUSH
# define VFP_ABI_POP
@@ -136,36 +135,14 @@ K512:
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-.Lsha512_block_data_order
-.skip 32-4
-#else
-.skip 32
-#endif
-.globl sha512_block_data_order
-.hidden sha512_block_data_order
-.type sha512_block_data_order,%function
-sha512_block_data_order:
-.Lsha512_block_data_order:
-#if __ARM_ARCH__<7 && !defined(__thumb2__)
- sub r3,pc,#8 @ sha512_block_data_order
-#else
- adr r3,.Lsha512_block_data_order
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
-#ifdef __APPLE__
- ldr r12,[r12]
-#endif
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
+.globl sha512_block_data_order_nohw
+.hidden sha512_block_data_order_nohw
+.type sha512_block_data_order_nohw,%function
+sha512_block_data_order_nohw:
add r2,r1,r2,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
- sub r14,r3,#672 @ K512
+ adr r14,K512
sub sp,sp,#9*8
ldr r7,[r0,#32+LO]
@@ -199,7 +176,7 @@ sha512_block_data_order:
str r4,[sp,#40+4]
.L00_15:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH<7
ldrb r3,[r1,#7]
ldrb r9, [r1,#6]
ldrb r10, [r1,#5]
@@ -276,7 +253,7 @@ sha512_block_data_order:
teq r9,#148
ldr r12,[sp,#16+0] @ c.lo
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
it eq @ Thumb2 thing, sanity check in ARM
#endif
orreq r14,r14,#1
@@ -416,7 +393,7 @@ sha512_block_data_order:
teq r9,#23
ldr r12,[sp,#16+0] @ c.lo
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
it eq @ Thumb2 thing, sanity check in ARM
#endif
orreq r14,r14,#1
@@ -453,7 +430,7 @@ sha512_block_data_order:
adc r6,r6,r4 @ h += T
tst r14,#1
add r14,r14,#8
-#if __ARM_ARCH__>=7
+#if __ARM_ARCH>=7
ittt eq @ Thumb2 thing, sanity check in ARM
#endif
ldreq r9,[sp,#184+0]
@@ -532,7 +509,7 @@ sha512_block_data_order:
bne .Loop
add sp,sp,#8*9 @ destroy frame
-#if __ARM_ARCH__>=5
+#if __ARM_ARCH>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
@@ -540,7 +517,7 @@ sha512_block_data_order:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
-.size sha512_block_data_order,.-sha512_block_data_order
+.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
@@ -550,7 +527,6 @@ sha512_block_data_order:
.type sha512_block_data_order_neon,%function
.align 4
sha512_block_data_order_neon:
-.LNEON:
dmb @ errata #451034 on early Cortex A8
add r2,r1,r2,lsl#7 @ len to point at the end of inp
adr r3,K512
@@ -1876,8 +1852,4 @@ sha512_block_data_order_neon:
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
-#endif
#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)