diff options
author | Frank Barchard <fbarchard@google.com> | 2022-02-03 22:28:09 -0800 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-02-03 22:29:27 -0800 |
commit | 88d06fc82ba0b4c368f76fd049f4888c1706816a (patch) | |
tree | 403564f4a40e7531d90fcbfbf324d50de629d980 | |
parent | ba05c6403e257cdba6af9dcaff572752cf5fe321 (diff) | |
download | XNNPACK-88d06fc82ba0b4c368f76fd049f4888c1706816a.tar.gz |
Disable neondot microkernels on iOS 32 bit
- select -mcpu=cyclone which is ARMv8 and supports AArch32.
- Disable dot product source code with #ifndef __APPLE__
PiperOrigin-RevId: 426318242
13 files changed, 40 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index b27844dd8..33d191ee1 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -8711,6 +8711,10 @@ xnnpack_cc_library( aarch32_srcs = AARCH32_ASM_MICROKERNEL_SRCS, aarch64_copts = ["-march=armv8.2-a+fp16+dotprod"], aarch64_srcs = AARCH64_ASM_MICROKERNEL_SRCS, + apple_aarch32_copts = [ + "-mcpu=cyclone", + "-mtune=generic", + ], wasm_srcs = WASM32_ASM_MICROKERNEL_SRCS, wasmrelaxedsimd_srcs = WASM32_ASM_MICROKERNEL_SRCS, wasmsimd_srcs = WASM32_ASM_MICROKERNEL_SRCS, diff --git a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S index 0a7864899..f1ef0fb03 100644 --- a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S +++ b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S @@ -48,6 +48,8 @@ // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55 # Push 80 bytes PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 @@ -315,6 +317,7 @@ BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a5 BX lr END_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S index ad9af11b2..ae0f58014 100644 --- a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S +++ b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S @@ -48,6 +48,8 @@ // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64 # Push 80 bytes PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 @@ -268,6 +270,7 @@ BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64 BX lr END_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S index 17bf66564..c3605bdd5 100644 --- a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S +++ b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S @@ -50,6 +50,8 @@ // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55 ADD r2, r2, 3 // kc = (kc + 3) & ~3 BIC r2, r2, 3 @@ -339,6 +341,7 @@ BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} END_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S index 9c1d9e310..c6f402513 100644 --- a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S +++ b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S @@ -50,6 +50,8 @@ // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64 ADD r2, r2, 3 // kc = (kc + 3) & ~3 BIC r2, r2, 3 @@ -292,6 +294,7 @@ BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} END_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in b/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in index b3f3b6e77..429772167 100644 --- a/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in +++ b/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in @@ -60,6 +60,8 @@ $else: // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55 # Push 80 bytes PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 @@ -363,6 +365,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_uke BX lr END_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in b/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in index b88cddfca..98342e67d 100644 --- a/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in +++ b/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in @@ -60,6 +60,8 @@ $else: // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64 # Push 80 bytes PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 @@ -316,6 +318,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_uke BX lr END_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S index 8629e37c2..504cbab79 100644 --- a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S +++ b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S @@ -51,6 +51,8 @@ // int8_t output_max; d13[7] // } rndnu_neon; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55 # Push 80 bytes PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 @@ -320,6 +322,7 @@ BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a BX lr END_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S index 94bad0879..9198a9d90 100644 --- a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S +++ b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S @@ -51,6 +51,8 @@ // int8_t output_max; d13[7] // } rndnu_neon; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64 # Push 80 bytes PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32 @@ -273,6 +275,7 @@ BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64 BX lr END_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in b/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in index 58f2ab37d..f6470f05d 100644 --- a/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in +++ b/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in @@ -62,6 +62,8 @@ $else: // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55 ADD r2, r2, 3 // kc = (kc + 3) & ~3 BIC r2, r2, 3 @@ -387,6 +389,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_uk POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} END_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in b/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in index 5361caf6b..1889e1a9f 100644 --- a/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in +++ b/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in @@ -62,6 +62,8 @@ $else: // int8_t output_max; d13[7] // } xnn_qs8_minmax_params.neonv8; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64 ADD r2, r2, 3 // kc = (kc + 3) & ~3 BIC r2, r2, 3 @@ -340,6 +342,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_uk POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} END_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S index 9de362cf6..a0f12af78 100644 --- a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S +++ b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S @@ -53,6 +53,8 @@ // int8_t output_max; d13[7] // } rndnu_neon; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55 ADD r2, r2, 3 // kc = (kc + 3) & ~3 BIC r2, r2, 3 @@ -344,6 +346,7 @@ BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_ POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} END_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits diff --git a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S index a28f00e7d..d6e06bf9e 100644 --- a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S +++ b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S @@ -53,6 +53,8 @@ // int8_t output_max; d13[7] // } rndnu_neon; +// iOS does not support 32 bit ARM with Neon DotProduct. +#ifndef __APPLE__ BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64 ADD r2, r2, 3 // kc = (kc + 3) & ~3 BIC r2, r2, 3 @@ -297,6 +299,7 @@ BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} END_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64 +#endif // __APPLE__ #ifdef __ELF__ .section ".note.GNU-stack","",%progbits |