aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2021-03-12 10:37:31 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2021-03-12 10:41:06 -0800
commitbbf51825bc2b96f691121611e8d1d262f76b8010 (patch)
tree84592c381caa8f04eb8f60e85a6f83543f48fdb4
parentcbb8e705ee30fecbdf12500a4f49643513864d01 (diff)
downloadXNNPACK-bbf51825bc2b96f691121611e8d1d262f76b8010.tar.gz
Enable QS8 2x8c8-aarch64-neon-mlal-padal GEMM / IGEMM microkernels
PiperOrigin-RevId: 362552666
-rw-r--r--src/init.c20
1 files changed, 10 insertions, 10 deletions
diff --git a/src/init.c b/src/init.c
index bd7b130b6..587bdafa8 100644
--- a/src/init.c
+++ b/src/init.c
@@ -816,13 +816,13 @@ static void init(void) {
xnn_params.qs8.gemm.nr = 16;
xnn_params.qs8.gemm.log2_kr = 2;
} else {
- xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
- xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
- xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
- xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
+ xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
+ xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
+ xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal);
+ xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal);
xnn_params.qs8.gemm.mr = 2;
xnn_params.qs8.gemm.nr = 8;
- xnn_params.qs8.gemm.log2_kr = 1;
+ xnn_params.qs8.gemm.log2_kr = 3;
}
#else // !XNN_ENABLE_ASSEMBLY
if (cpuinfo_has_arm_neon_dot()) {
@@ -862,13 +862,13 @@ static void init(void) {
xnn_params.qs8.gemm.nr = 16;
xnn_params.qs8.gemm.log2_kr = 2;
} else {
- xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
- xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
- xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
- xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
+ xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
+ xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
+ xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal);
+ xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal);
xnn_params.qs8.gemm.mr = 2;
xnn_params.qs8.gemm.nr = 8;
- xnn_params.qs8.gemm.log2_kr = 1;
+ xnn_params.qs8.gemm.log2_kr = 3;
}
#if XNN_MAX_UARCH_TYPES > 1
{