diff options
author | Frank Barchard <fbarchard@google.com> | 2021-03-12 10:37:31 -0800 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2021-03-12 10:41:06 -0800 |
commit | bbf51825bc2b96f691121611e8d1d262f76b8010 (patch) | |
tree | 84592c381caa8f04eb8f60e85a6f83543f48fdb4 | |
parent | cbb8e705ee30fecbdf12500a4f49643513864d01 (diff) | |
download | XNNPACK-bbf51825bc2b96f691121611e8d1d262f76b8010.tar.gz |
Enable QS8 2x8c8-aarch64-neon-mlal-padal GEMM / IGEMM microkernels
PiperOrigin-RevId: 362552666
-rw-r--r-- | src/init.c | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/src/init.c b/src/init.c index bd7b130b6..587bdafa8 100644 --- a/src/init.c +++ b/src/init.c @@ -816,13 +816,13 @@ static void init(void) { xnn_params.qs8.gemm.nr = 16; xnn_params.qs8.gemm.log2_kr = 2; } else { - xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup); - xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup); - xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup); - xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup); + xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal); + xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal); + xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal); + xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal); xnn_params.qs8.gemm.mr = 2; xnn_params.qs8.gemm.nr = 8; - xnn_params.qs8.gemm.log2_kr = 1; + xnn_params.qs8.gemm.log2_kr = 3; } #else // !XNN_ENABLE_ASSEMBLY if (cpuinfo_has_arm_neon_dot()) { @@ -862,13 +862,13 @@ static void init(void) { xnn_params.qs8.gemm.nr = 16; xnn_params.qs8.gemm.log2_kr = 2; } else { - xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup); - xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup); - xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup); - xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup); + xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal); + xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal); + xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal); + xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal); xnn_params.qs8.gemm.mr = 2; xnn_params.qs8.gemm.nr = 8; - xnn_params.qs8.gemm.log2_kr = 1; + xnn_params.qs8.gemm.log2_kr = 3; } #if XNN_MAX_UARCH_TYPES > 1 { |