aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-02-03 22:28:09 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-02-03 22:29:27 -0800
commit88d06fc82ba0b4c368f76fd049f4888c1706816a (patch)
tree403564f4a40e7531d90fcbfbf324d50de629d980
parentba05c6403e257cdba6af9dcaff572752cf5fe321 (diff)
downloadXNNPACK-88d06fc82ba0b4c368f76fd049f4888c1706816a.tar.gz
Disable neondot microkernels on iOS 32 bit
- select -mcpu=cyclone which is ARMv8 and supports AArch32. - Disable dot product source code with #ifndef __APPLE__ PiperOrigin-RevId: 426318242
-rw-r--r--BUILD.bazel4
-rw-r--r--src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S3
-rw-r--r--src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S3
-rw-r--r--src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S3
-rw-r--r--src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S3
-rw-r--r--src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in3
-rw-r--r--src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in3
-rw-r--r--src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S3
-rw-r--r--src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S3
-rw-r--r--src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in3
-rw-r--r--src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in3
-rw-r--r--src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S3
-rw-r--r--src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S3
13 files changed, 40 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index b27844dd8..33d191ee1 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -8711,6 +8711,10 @@ xnnpack_cc_library(
aarch32_srcs = AARCH32_ASM_MICROKERNEL_SRCS,
aarch64_copts = ["-march=armv8.2-a+fp16+dotprod"],
aarch64_srcs = AARCH64_ASM_MICROKERNEL_SRCS,
+ apple_aarch32_copts = [
+ "-mcpu=cyclone",
+ "-mtune=generic",
+ ],
wasm_srcs = WASM32_ASM_MICROKERNEL_SRCS,
wasmrelaxedsimd_srcs = WASM32_ASM_MICROKERNEL_SRCS,
wasmsimd_srcs = WASM32_ASM_MICROKERNEL_SRCS,
diff --git a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S
index 0a7864899..f1ef0fb03 100644
--- a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S
+++ b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S
@@ -48,6 +48,8 @@
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55
# Push 80 bytes
PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32
@@ -315,6 +317,7 @@ BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a5
BX lr
END_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S
index ad9af11b2..ae0f58014 100644
--- a/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S
+++ b/src/qc8-gemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S
@@ -48,6 +48,8 @@
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64
# Push 80 bytes
PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32
@@ -268,6 +270,7 @@ BEGIN_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64
BX lr
END_FUNCTION xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S
index 17bf66564..c3605bdd5 100644
--- a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S
+++ b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-cortex-a55.S
@@ -50,6 +50,8 @@
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55
ADD r2, r2, 3 // kc = (kc + 3) & ~3
BIC r2, r2, 3
@@ -339,6 +341,7 @@ BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
END_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S
index 9c1d9e310..c6f402513 100644
--- a/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S
+++ b/src/qc8-igemm/gen/4x8c4-minmax-fp32-aarch32-neondot-ld64.S
@@ -50,6 +50,8 @@
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64
ADD r2, r2, 3 // kc = (kc + 3) & ~3
BIC r2, r2, 3
@@ -292,6 +294,7 @@ BEGIN_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
END_FUNCTION xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in b/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in
index b3f3b6e77..429772167 100644
--- a/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in
+++ b/src/qs8-gemm/4x8c4-aarch32-neondot-cortex-a55.S.in
@@ -60,6 +60,8 @@ $else:
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55
# Push 80 bytes
PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32
@@ -363,6 +365,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_uke
BX lr
END_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in b/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in
index b88cddfca..98342e67d 100644
--- a/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in
+++ b/src/qs8-gemm/4x8c4-aarch32-neondot-ld64.S.in
@@ -60,6 +60,8 @@ $else:
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64
# Push 80 bytes
PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32
@@ -316,6 +318,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_uke
BX lr
END_FUNCTION xnn_${DATATYPE.lower()}_gemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S
index 8629e37c2..504cbab79 100644
--- a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S
+++ b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S
@@ -51,6 +51,8 @@
// int8_t output_max; d13[7]
// } rndnu_neon;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55
# Push 80 bytes
PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32
@@ -320,6 +322,7 @@ BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a
BX lr
END_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S
index 94bad0879..9198a9d90 100644
--- a/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S
+++ b/src/qs8-gemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S
@@ -51,6 +51,8 @@
// int8_t output_max; d13[7]
// } rndnu_neon;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64
# Push 80 bytes
PUSH {r4, r5, r6, r7, r8, r9, r10, r11} // 32
@@ -273,6 +275,7 @@ BEGIN_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64
BX lr
END_FUNCTION xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in b/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in
index 58f2ab37d..f6470f05d 100644
--- a/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in
+++ b/src/qs8-igemm/4x8c4-aarch32-neondot-cortex-a55.S.in
@@ -62,6 +62,8 @@ $else:
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55
ADD r2, r2, 3 // kc = (kc + 3) & ~3
BIC r2, r2, 3
@@ -387,6 +389,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_uk
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
END_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_cortex_a55
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in b/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in
index 5361caf6b..1889e1a9f 100644
--- a/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in
+++ b/src/qs8-igemm/4x8c4-aarch32-neondot-ld64.S.in
@@ -62,6 +62,8 @@ $else:
// int8_t output_max; d13[7]
// } xnn_qs8_minmax_params.neonv8;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64
ADD r2, r2, 3 // kc = (kc + 3) & ~3
BIC r2, r2, 3
@@ -340,6 +342,7 @@ BEGIN_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_uk
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
END_FUNCTION xnn_${DATATYPE.lower()}_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_4x8c4__aarch32_neondot_ld64
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S
index 9de362cf6..a0f12af78 100644
--- a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S
+++ b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-cortex-a55.S
@@ -53,6 +53,8 @@
// int8_t output_max; d13[7]
// } rndnu_neon;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55
ADD r2, r2, 3 // kc = (kc + 3) & ~3
BIC r2, r2, 3
@@ -344,6 +346,7 @@ BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
END_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S
index a28f00e7d..d6e06bf9e 100644
--- a/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S
+++ b/src/qs8-igemm/gen/4x8c4-minmax-rndnu-aarch32-neondot-ld64.S
@@ -53,6 +53,8 @@
// int8_t output_max; d13[7]
// } rndnu_neon;
+// iOS does not support 32 bit ARM with Neon DotProduct.
+#ifndef __APPLE__
BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64
ADD r2, r2, 3 // kc = (kc + 3) & ~3
BIC r2, r2, 3
@@ -297,6 +299,7 @@ BEGIN_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
END_FUNCTION xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64
+#endif // __APPLE__
#ifdef __ELF__
.section ".note.GNU-stack","",%progbits