diff options
Diffstat (limited to 'linux-arm/crypto/fipsmodule')
-rw-r--r-- | linux-arm/crypto/fipsmodule/armv4-mont-linux.S | 2 | ||||
-rw-r--r-- | linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S | 1 | ||||
-rw-r--r-- | linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S | 57 | ||||
-rw-r--r-- | linux-arm/crypto/fipsmodule/sha256-armv4-linux.S | 170 | ||||
-rw-r--r-- | linux-arm/crypto/fipsmodule/sha512-armv4-linux.S | 50 |
5 files changed, 112 insertions, 168 deletions
diff --git a/linux-arm/crypto/fipsmodule/armv4-mont-linux.S b/linux-arm/crypto/fipsmodule/armv4-mont-linux.S index 0ec68610..8073aa62 100644 --- a/linux-arm/crypto/fipsmodule/armv4-mont-linux.S +++ b/linux-arm/crypto/fipsmodule/armv4-mont-linux.S @@ -193,7 +193,7 @@ bn_mul_mont: add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 .Labrt: -#if __ARM_ARCH__>=5 +#if __ARM_ARCH>=5 bx lr @ bx lr #else tst lr,#1 diff --git a/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S b/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S index 49eda8d6..01a9ead2 100644 --- a/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S +++ b/linux-arm/crypto/fipsmodule/bsaes-armv7-linux.S @@ -67,7 +67,6 @@ # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ 7 #endif diff --git a/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S b/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S index 660ccbaa..323e6e6e 100644 --- a/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S +++ b/linux-arm/crypto/fipsmodule/sha1-armv4-large-linux.S @@ -14,25 +14,12 @@ .code 32 #endif -.globl sha1_block_data_order -.hidden sha1_block_data_order -.type sha1_block_data_order,%function +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,%function .align 5 -sha1_block_data_order: -#if __ARM_MAX_ARCH__>=7 -.Lsha1_block: - adr r3,.Lsha1_block - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P -#ifdef __APPLE__ - ldr r12,[r12] -#endif - tst r12,#ARMV8_SHA1 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif +sha1_block_data_order_nohw: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ldmia r0,{r3,r4,r5,r6,r7} @@ -44,7 +31,7 @@ sha1_block_data_order: mov r6,r6,ror#30 mov r7,r7,ror#30 @ [6] .L_00_15: -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r10,[r1,#2] ldrb r9,[r1,#3] ldrb r11,[r1,#1] @@ -69,7 +56,7 @@ sha1_block_data_order: eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r7,r7,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r10,[r1,#2] ldrb r9,[r1,#3] ldrb r11,[r1,#1] @@ -94,7 +81,7 @@ sha1_block_data_order: eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r6,r6,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r10,[r1,#2] ldrb r9,[r1,#3] ldrb r11,[r1,#1] @@ -119,7 +106,7 @@ sha1_block_data_order: eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r5,r5,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r10,[r1,#2] ldrb r9,[r1,#3] ldrb r11,[r1,#1] @@ -144,7 +131,7 @@ sha1_block_data_order: eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) str r9,[r14,#-4]! add r4,r4,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r10,[r1,#2] ldrb r9,[r1,#3] ldrb r11,[r1,#1] @@ -177,7 +164,7 @@ sha1_block_data_order: #endif bne .L_00_15 @ [((11+4)*5+2)*3] sub sp,sp,#25*4 -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r10,[r1,#2] ldrb r9,[r1,#3] ldrb r11,[r1,#1] @@ -475,7 +462,7 @@ sha1_block_data_order: teq r1,r2 bne .Lloop @ [+18], total 1307 -#if __ARM_ARCH__>=5 +#if __ARM_ARCH>=5 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} #else ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @@ -483,17 +470,13 @@ sha1_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha1_block_data_order,.-sha1_block_data_order +.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw .align 5 .LK_00_19:.word 0x5a827999 .LK_20_39:.word 0x6ed9eba1 .LK_40_59:.word 0x8f1bbcdc .LK_60_79:.word 0xca62c1d6 -#if __ARM_MAX_ARCH__>=7 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lsha1_block -#endif .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 5 @@ -501,10 +484,11 @@ sha1_block_data_order: .arch armv7-a .fpu neon +.globl sha1_block_data_order_neon +.hidden sha1_block_data_order_neon .type sha1_block_data_order_neon,%function .align 4 sha1_block_data_order_neon: -.LNEON: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 @ dmb @ errata #451034 on early Cortex A8 @@ -1360,10 +1344,11 @@ sha1_block_data_order_neon: # define INST(a,b,c,d) .byte a,b,c,d|0x10 # endif -.type sha1_block_data_order_armv8,%function +.globl sha1_block_data_order_hw +.hidden sha1_block_data_order_hw +.type sha1_block_data_order_hw,%function .align 5 -sha1_block_data_order_armv8: -.LARMv8: +sha1_block_data_order_hw: vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so veor q1,q1,q1 @@ -1491,10 +1476,6 @@ sha1_block_data_order_armv8: vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} bx lr @ bx lr -.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 -#endif -#if __ARM_MAX_ARCH__>=7 -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P +.size sha1_block_data_order_hw,.-sha1_block_data_order_hw #endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S b/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S index 2450322e..82cc57ab 100644 --- a/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S +++ b/linux-arm/crypto/fipsmodule/sha256-armv4-linux.S @@ -52,7 +52,7 @@ #ifndef __KERNEL__ # include <openssl/arm_arch.h> #else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_ARCH __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ 7 #endif @@ -90,47 +90,26 @@ K256: .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 .word 0 @ terminator -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lsha256_block_data_order -#endif .align 5 -.globl sha256_block_data_order -.hidden sha256_block_data_order -.type sha256_block_data_order,%function -sha256_block_data_order: -.Lsha256_block_data_order: -#if __ARM_ARCH__<7 && !defined(__thumb2__) - sub r3,pc,#8 @ sha256_block_data_order -#else - adr r3,.Lsha256_block_data_order -#endif -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P -#ifdef __APPLE__ - ldr r12,[r12] -#endif - tst r12,#ARMV8_SHA256 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,%function +sha256_block_data_order_nohw: add r2,r1,r2,lsl#6 @ len to point at the end of inp stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} - sub r14,r3,#256+32 @ K256 + adr r14,K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 # else ldrb r2,[r1,#3] # endif eor r3,r5,r6 @ magic eor r12,r12,r12 -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 0 # if 0==15 str r1,[sp,#17*4] @ make room for r1 @@ -171,7 +150,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 0<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -188,7 +167,7 @@ sha256_block_data_order: eor r3,r3,r5 @ Maj(a,b,c) add r11,r11,r0,ror#2 @ h+=Sigma0(a) @ add r11,r11,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 1 # if 1==15 str r1,[sp,#17*4] @ make room for r1 @@ -229,7 +208,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 1<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -246,7 +225,7 @@ sha256_block_data_order: eor r12,r12,r4 @ Maj(a,b,c) add r10,r10,r0,ror#2 @ h+=Sigma0(a) @ add r10,r10,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 2 # if 2==15 str r1,[sp,#17*4] @ make room for r1 @@ -287,7 +266,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 2<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -304,7 +283,7 @@ sha256_block_data_order: eor r3,r3,r11 @ Maj(a,b,c) add r9,r9,r0,ror#2 @ h+=Sigma0(a) @ add r9,r9,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 3 # if 3==15 str r1,[sp,#17*4] @ make room for r1 @@ -345,7 +324,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 3<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -362,7 +341,7 @@ sha256_block_data_order: eor r12,r12,r10 @ Maj(a,b,c) add r8,r8,r0,ror#2 @ h+=Sigma0(a) @ add r8,r8,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 4 # if 4==15 str r1,[sp,#17*4] @ make room for r1 @@ -403,7 +382,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 4<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -420,7 +399,7 @@ sha256_block_data_order: eor r3,r3,r9 @ Maj(a,b,c) add r7,r7,r0,ror#2 @ h+=Sigma0(a) @ add r7,r7,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 5 # if 5==15 str r1,[sp,#17*4] @ make room for r1 @@ -461,7 +440,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 5<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -478,7 +457,7 @@ sha256_block_data_order: eor r12,r12,r8 @ Maj(a,b,c) add r6,r6,r0,ror#2 @ h+=Sigma0(a) @ add r6,r6,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 6 # if 6==15 str r1,[sp,#17*4] @ make room for r1 @@ -519,7 +498,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 6<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -536,7 +515,7 @@ sha256_block_data_order: eor r3,r3,r7 @ Maj(a,b,c) add r5,r5,r0,ror#2 @ h+=Sigma0(a) @ add r5,r5,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 7 # if 7==15 str r1,[sp,#17*4] @ make room for r1 @@ -577,7 +556,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 7<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -594,7 +573,7 @@ sha256_block_data_order: eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 8 # if 8==15 str r1,[sp,#17*4] @ make room for r1 @@ -635,7 +614,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 8<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -652,7 +631,7 @@ sha256_block_data_order: eor r3,r3,r5 @ Maj(a,b,c) add r11,r11,r0,ror#2 @ h+=Sigma0(a) @ add r11,r11,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 9 # if 9==15 str r1,[sp,#17*4] @ make room for r1 @@ -693,7 +672,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 9<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -710,7 +689,7 @@ sha256_block_data_order: eor r12,r12,r4 @ Maj(a,b,c) add r10,r10,r0,ror#2 @ h+=Sigma0(a) @ add r10,r10,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 10 # if 10==15 str r1,[sp,#17*4] @ make room for r1 @@ -751,7 +730,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 10<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -768,7 +747,7 @@ sha256_block_data_order: eor r3,r3,r11 @ Maj(a,b,c) add r9,r9,r0,ror#2 @ h+=Sigma0(a) @ add r9,r9,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 11 # if 11==15 str r1,[sp,#17*4] @ make room for r1 @@ -809,7 +788,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 11<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -826,7 +805,7 @@ sha256_block_data_order: eor r12,r12,r10 @ Maj(a,b,c) add r8,r8,r0,ror#2 @ h+=Sigma0(a) @ add r8,r8,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 12 # if 12==15 str r1,[sp,#17*4] @ make room for r1 @@ -867,7 +846,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 12<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -884,7 +863,7 @@ sha256_block_data_order: eor r3,r3,r9 @ Maj(a,b,c) add r7,r7,r0,ror#2 @ h+=Sigma0(a) @ add r7,r7,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 13 # if 13==15 str r1,[sp,#17*4] @ make room for r1 @@ -925,7 +904,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 13<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -942,7 +921,7 @@ sha256_block_data_order: eor r12,r12,r8 @ Maj(a,b,c) add r6,r6,r0,ror#2 @ h+=Sigma0(a) @ add r6,r6,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 14 # if 14==15 str r1,[sp,#17*4] @ make room for r1 @@ -983,7 +962,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 14<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1000,7 +979,7 @@ sha256_block_data_order: eor r3,r3,r7 @ Maj(a,b,c) add r5,r5,r0,ror#2 @ h+=Sigma0(a) @ add r5,r5,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 @ ldr r2,[r1],#4 @ 15 # if 15==15 str r1,[sp,#17*4] @ make room for r1 @@ -1041,7 +1020,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 15<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1091,7 +1070,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 16<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1140,7 +1119,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 17<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1189,7 +1168,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 18<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1238,7 +1217,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 19<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1287,7 +1266,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 20<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1336,7 +1315,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 21<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1385,7 +1364,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 22<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1434,7 +1413,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 23<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1483,7 +1462,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 24<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1532,7 +1511,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 25<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1581,7 +1560,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 26<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1630,7 +1609,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 27<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1679,7 +1658,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 28<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1728,7 +1707,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 29<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1777,7 +1756,7 @@ sha256_block_data_order: cmp r12,#0xf2 @ done? #endif #if 30<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1826,7 +1805,7 @@ sha256_block_data_order: cmp r3,#0xf2 @ done? #endif #if 31<15 -# if __ARM_ARCH__>=7 +# if __ARM_ARCH>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] @@ -1843,7 +1822,7 @@ sha256_block_data_order: eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 ite eq @ Thumb2 thing, sanity check in ARM #endif ldreq r3,[sp,#16*4] @ pull ctx @@ -1874,7 +1853,7 @@ sha256_block_data_order: bne .Loop add sp,sp,#19*4 @ destroy frame -#if __ARM_ARCH__>=5 +#if __ARM_ARCH>=5 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} #else ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} @@ -1882,7 +1861,7 @@ sha256_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order +.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw #if __ARM_MAX_ARCH__>=7 .arch armv7-a .fpu neon @@ -1893,10 +1872,12 @@ sha256_block_data_order: .align 5 .skip 16 sha256_block_data_order_neon: -.LNEON: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} sub r11,sp,#16*4+16 + @ In Arm mode, the following ADR runs up against the limits of encodable + @ offsets. It only fits because the offset, when the ADR is placed here, + @ is a multiple of 16. adr r14,K256 bic r11,r11,#15 @ align for 128-bit stores mov r12,sp @@ -2679,12 +2660,27 @@ sha256_block_data_order_neon: # define INST(a,b,c,d) .byte a,b,c,d # endif -.type sha256_block_data_order_armv8,%function +.LK256_shortcut: +@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode. +#if defined(__thumb2__) +.word K256-(.LK256_add+4) +#else +.word K256-(.LK256_add+8) +#endif + +.globl sha256_block_data_order_hw +.hidden sha256_block_data_order_hw +.type sha256_block_data_order_hw,%function .align 5 -sha256_block_data_order_armv8: -.LARMv8: +sha256_block_data_order_hw: + @ K256 is too far to reference from one ADR command in Thumb mode. In + @ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte + @ boundary. For simplicity, just load the offset from .LK256_shortcut. + ldr r3,.LK256_shortcut +.LK256_add: + add r3,pc,r3 + vld1.32 {q0,q1},[r0] - sub r3,r3,#256+32 add r2,r1,r2,lsl#6 @ len to point at the end of inp b .Loop_v8 @@ -2816,13 +2812,9 @@ sha256_block_data_order_armv8: vst1.32 {q0,q1},[r0] bx lr @ bx lr -.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +.size sha256_block_data_order_hw,.-sha256_block_data_order_hw #endif .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P -#endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) diff --git a/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S b/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S index 9aed7cb6..55006865 100644 --- a/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S +++ b/linux-arm/crypto/fipsmodule/sha512-armv4-linux.S @@ -63,7 +63,6 @@ # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} # define VFP_ABI_POP vldmia sp!,{d8-d15} #else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ 7 # define VFP_ABI_PUSH # define VFP_ABI_POP @@ -136,36 +135,14 @@ K512: WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lsha512_block_data_order -.skip 32-4 -#else -.skip 32 -#endif -.globl sha512_block_data_order -.hidden sha512_block_data_order -.type sha512_block_data_order,%function -sha512_block_data_order: -.Lsha512_block_data_order: -#if __ARM_ARCH__<7 && !defined(__thumb2__) - sub r3,pc,#8 @ sha512_block_data_order -#else - adr r3,.Lsha512_block_data_order -#endif -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P -#ifdef __APPLE__ - ldr r12,[r12] -#endif - tst r12,#ARMV7_NEON - bne .LNEON -#endif +.globl sha512_block_data_order_nohw +.hidden sha512_block_data_order_nohw +.type sha512_block_data_order_nohw,%function +sha512_block_data_order_nohw: add r2,r1,r2,lsl#7 @ len to point at the end of inp stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} - sub r14,r3,#672 @ K512 + adr r14,K512 sub sp,sp,#9*8 ldr r7,[r0,#32+LO] @@ -199,7 +176,7 @@ sha512_block_data_order: str r4,[sp,#40+4] .L00_15: -#if __ARM_ARCH__<7 +#if __ARM_ARCH<7 ldrb r3,[r1,#7] ldrb r9, [r1,#6] ldrb r10, [r1,#5] @@ -276,7 +253,7 @@ sha512_block_data_order: teq r9,#148 ldr r12,[sp,#16+0] @ c.lo -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 it eq @ Thumb2 thing, sanity check in ARM #endif orreq r14,r14,#1 @@ -416,7 +393,7 @@ sha512_block_data_order: teq r9,#23 ldr r12,[sp,#16+0] @ c.lo -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 it eq @ Thumb2 thing, sanity check in ARM #endif orreq r14,r14,#1 @@ -453,7 +430,7 @@ sha512_block_data_order: adc r6,r6,r4 @ h += T tst r14,#1 add r14,r14,#8 -#if __ARM_ARCH__>=7 +#if __ARM_ARCH>=7 ittt eq @ Thumb2 thing, sanity check in ARM #endif ldreq r9,[sp,#184+0] @@ -532,7 +509,7 @@ sha512_block_data_order: bne .Loop add sp,sp,#8*9 @ destroy frame -#if __ARM_ARCH__>=5 +#if __ARM_ARCH>=5 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} #else ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @@ -540,7 +517,7 @@ sha512_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha512_block_data_order,.-sha512_block_data_order +.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw #if __ARM_MAX_ARCH__>=7 .arch armv7-a .fpu neon @@ -550,7 +527,6 @@ sha512_block_data_order: .type sha512_block_data_order_neon,%function .align 4 sha512_block_data_order_neon: -.LNEON: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp adr r3,K512 @@ -1876,8 +1852,4 @@ sha512_block_data_order_neon: .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P -#endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) |