aboutsummaryrefslogtreecommitdiff
path: root/src/xnnpack
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-07-27 21:14:38 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-07-27 21:15:38 -0700
commitc836505ed4498a2ebd1c21050c383a0a60a8defc (patch)
treeb26a80a0c5d6581794cc953414d7e05df7653ac9 /src/xnnpack
parent917e63588c2664a12417beb01e59f9e4a10251bc (diff)
downloadXNNPACK-c836505ed4498a2ebd1c21050c383a0a60a8defc.tar.gz
Refactor declarations of microkernel parameters
- Extract declarations of microkernel parameters into microparams.h - Group and document microkernel parameters - Rename params-init accordingly - Make microkernels depend only on microparams.h and not params.h PiperOrigin-RevId: 463747649
Diffstat (limited to 'src/xnnpack')
-rw-r--r--src/xnnpack/argmaxpool.h1
-rw-r--r--src/xnnpack/avgpool.h2
-rw-r--r--src/xnnpack/conv.h2
-rw-r--r--src/xnnpack/depthtospace.h1
-rw-r--r--src/xnnpack/dwconv.h2
-rw-r--r--src/xnnpack/fill.h1
-rw-r--r--src/xnnpack/gavgpool.h2
-rw-r--r--src/xnnpack/gemm.h4
-rw-r--r--src/xnnpack/ibilinear.h1
-rw-r--r--src/xnnpack/igemm.h4
-rw-r--r--src/xnnpack/lut.h1
-rw-r--r--src/xnnpack/maxpool.h2
-rw-r--r--src/xnnpack/microparams-init.h (renamed from src/xnnpack/params-init.h)0
-rw-r--r--src/xnnpack/microparams.h2481
-rw-r--r--src/xnnpack/pad.h1
-rw-r--r--src/xnnpack/params.h2416
-rw-r--r--src/xnnpack/pavgpool.h2
-rw-r--r--src/xnnpack/ppmm.h2
-rw-r--r--src/xnnpack/prelu.h1
-rw-r--r--src/xnnpack/raddexpminusmax.h1
-rw-r--r--src/xnnpack/raddextexp.h1
-rw-r--r--src/xnnpack/raddstoreexpminusmax.h2
-rw-r--r--src/xnnpack/requantization-stubs.h2
-rw-r--r--src/xnnpack/requantization.h4
-rw-r--r--src/xnnpack/rmaxabs.h1
-rw-r--r--src/xnnpack/spmm.h2
-rw-r--r--src/xnnpack/transpose.h2
-rw-r--r--src/xnnpack/unpool.h1
-rw-r--r--src/xnnpack/vaddsub.h2
-rw-r--r--src/xnnpack/vbinary.h2
-rw-r--r--src/xnnpack/vcvt.h2
-rw-r--r--src/xnnpack/vlrelu.h2
-rw-r--r--src/xnnpack/vlshift.h1
-rw-r--r--src/xnnpack/vmul.h2
-rw-r--r--src/xnnpack/vmulcaddc.h2
-rw-r--r--src/xnnpack/vscaleexpminusmax.h1
-rw-r--r--src/xnnpack/vscaleextexp.h1
-rw-r--r--src/xnnpack/vunary.h2
-rw-r--r--src/xnnpack/window.h1
-rw-r--r--src/xnnpack/zip.h1
40 files changed, 2506 insertions, 2455 deletions
diff --git a/src/xnnpack/argmaxpool.h b/src/xnnpack/argmaxpool.h
index 3366f0c1a..c900ce1aa 100644
--- a/src/xnnpack/argmaxpool.h
+++ b/src/xnnpack/argmaxpool.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/avgpool.h b/src/xnnpack/avgpool.h
index d766ac7b4..366986b79 100644
--- a/src/xnnpack/avgpool.h
+++ b/src/xnnpack/avgpool.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/conv.h b/src/xnnpack/conv.h
index 0b02beeaf..02a713417 100644
--- a/src/xnnpack/conv.h
+++ b/src/xnnpack/conv.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/depthtospace.h b/src/xnnpack/depthtospace.h
index 285fd3cbe..358b9df4c 100644
--- a/src/xnnpack/depthtospace.h
+++ b/src/xnnpack/depthtospace.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/dwconv.h b/src/xnnpack/dwconv.h
index 4b1464e1e..83ef6e13d 100644
--- a/src/xnnpack/dwconv.h
+++ b/src/xnnpack/dwconv.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/fill.h b/src/xnnpack/fill.h
index 0b62fea94..97cfd5007 100644
--- a/src/xnnpack/fill.h
+++ b/src/xnnpack/fill.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/gavgpool.h b/src/xnnpack/gavgpool.h
index fc5b153a4..bec595df8 100644
--- a/src/xnnpack/gavgpool.h
+++ b/src/xnnpack/gavgpool.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/gemm.h b/src/xnnpack/gemm.h
index 519f51358..543ebfb10 100644
--- a/src/xnnpack/gemm.h
+++ b/src/xnnpack/gemm.h
@@ -11,8 +11,10 @@
#include <stddef.h>
#include <stdint.h>
+#include <xnnpack.h> // For xnn_status
+
#include <xnnpack/common.h>
-#include <xnnpack/params.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/ibilinear.h b/src/xnnpack/ibilinear.h
index 12ecb605c..2744e404b 100644
--- a/src/xnnpack/ibilinear.h
+++ b/src/xnnpack/ibilinear.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/igemm.h b/src/xnnpack/igemm.h
index 2ddd739f1..a79c3936d 100644
--- a/src/xnnpack/igemm.h
+++ b/src/xnnpack/igemm.h
@@ -11,8 +11,10 @@
#include <stddef.h>
#include <stdint.h>
+#include <xnnpack.h> // For xnn_status
+
#include <xnnpack/common.h>
-#include <xnnpack/params.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/lut.h b/src/xnnpack/lut.h
index f11954e01..57d36412b 100644
--- a/src/xnnpack/lut.h
+++ b/src/xnnpack/lut.h
@@ -11,7 +11,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/maxpool.h b/src/xnnpack/maxpool.h
index 0310e77b7..a47c62531 100644
--- a/src/xnnpack/maxpool.h
+++ b/src/xnnpack/maxpool.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/params-init.h b/src/xnnpack/microparams-init.h
index 3e5aa121d..3e5aa121d 100644
--- a/src/xnnpack/params-init.h
+++ b/src/xnnpack/microparams-init.h
diff --git a/src/xnnpack/microparams.h b/src/xnnpack/microparams.h
new file mode 100644
index 000000000..9c6c3bb41
--- /dev/null
+++ b/src/xnnpack/microparams.h
@@ -0,0 +1,2481 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/common.h>
+
+
+// Default: serves to differentiate pointer types for micro-kernels without fused activation.
+
+union xnn_f16_default_params {
+ char _; // Dummy member variable to comply with the C standard
+};
+
+union xnn_f32_default_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ int32_t mask_table[14];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+
+// ReLU: serves to differentiate pointer types for micro-kernels with fused ReLU activation.
+
+union xnn_f32_relu_params {
+ char _; // Dummy member variable to comply with the C standard
+};
+
+
+// Scale+Min+Max: used by AVGPOOL/GAVGPOOL microkernels.
+
+union xnn_f16_scaleminmax_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t scale;
+ uint16_t min;
+ uint16_t max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float min[8];
+ XNN_ALIGN(32) float max[8];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_scaleminmax_params {
+ struct {
+ float scale;
+ float min;
+ float max;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float min[4];
+ XNN_ALIGN(16) float max[4];
+ } sse;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+
+// Min+Max: used by VCLAMP and GEMM/IGEMM/DWCONV/MAXPOOL/etc with MINMAX activation.
+
+union xnn_f16_minmax_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t min;
+ uint16_t max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float min[8];
+ XNN_ALIGN(32) float max[8];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_minmax_params {
+ struct {
+ float min;
+ float max;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float min[4];
+ XNN_ALIGN(16) float max[4];
+ } sse;
+ struct {
+ XNN_ALIGN(32) float min[8];
+ XNN_ALIGN(32) float max[8];
+ int32_t mask_table[14];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float min[2];
+ XNN_ALIGN(8) float max[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_s8_minmax_params {
+ struct {
+ int32_t min;
+ int32_t max;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint8_t bias[16];
+ XNN_ALIGN(16) uint8_t min_with_bias[16];
+ XNN_ALIGN(16) uint8_t max_with_bias[16];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int8_t min[16];
+ XNN_ALIGN(16) int8_t max[16];
+ } sse4;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int8_t min;
+ int8_t max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int8_t min[8];
+ XNN_ALIGN(8) int8_t max[8];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_u8_minmax_params {
+ struct {
+ uint32_t min;
+ uint32_t max;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint8_t min[16];
+ XNN_ALIGN(16) uint8_t max[16];
+ } sse2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint8_t min;
+ uint8_t max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) uint8_t min[8];
+ XNN_ALIGN(8) uint8_t max[8];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Conv Min+Max: used by quantized GEMM/IGEMM/DWCONV microkernels with MINMAX activation.
+
+union xnn_qs8_minmax_params {
+ struct {
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } scalar_imagic;
+ struct {
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } scalar_fmagic;
+ struct {
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } scalar_lrintf;
+#if XNN_ARCH_ARM
+ struct {
+ float magic_bias;
+ int32_t magic_bias_less_zero_point;
+ uint32_t output_min;
+ uint32_t output_max;
+ } armv6simd;
+#endif // XNN_ARCH_ARM
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } neon;
+ struct {
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } neonv8;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int16_t output_min[8];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ } sse4;
+ struct {
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(32) int8_t output_min[32];
+ } avx2;
+ struct {
+ XNN_ALIGN(64) float output_max_less_zero_point[16];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(64) int8_t output_min[64];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qs8_conv_minmax_params {
+ struct {
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } fp32_scalar_fmagic;
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } fp32_scalar_imagic;
+ struct {
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } fp32_scalar_lrintf;
+#if XNN_ARCH_ARM
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_zero_point;
+ uint32_t output_min;
+ uint32_t output_max;
+ } fp32_armv6simd;
+#endif // XNN_ARCH_ARM
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } fp32_neon;
+ struct {
+ float scale;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } fp32_neonv8;
+ struct {
+ int32_t right_pre_shift;
+ int32_t multiplier;
+ int32_t right_post_shift;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } rndnu_neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int16_t output_min[8];
+ } fp32_sse2;
+ struct {
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ } fp32_sse4;
+ struct {
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(32) int8_t output_min[32];
+ } fp32_avx2;
+ struct {
+ XNN_ALIGN(64) float scale[16];
+ XNN_ALIGN(64) float output_max_less_zero_point[16];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(64) int8_t output_min[64];
+ } fp32_avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } fp32_wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_conv_minmax_params {
+ struct {
+ int32_t kernel_zero_point;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } fp32_scalar_fmagic;
+ struct {
+ int32_t kernel_zero_point;
+ float scale;
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } fp32_scalar_imagic;
+ struct {
+ int32_t kernel_zero_point;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } fp32_scalar_lrintf;
+#if XNN_ARCH_ARM
+ struct {
+ float scale;
+ float magic_bias;
+ uint32_t minus_kernel_zero_point;
+ int32_t magic_bias_less_zero_point;
+ uint32_t output_min;
+ uint32_t output_max;
+ } fp32_armv6simd;
+#endif // XNN_ARCH_ARM
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint8_t kernel_zero_point[4];
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } fp32_neon;
+ struct {
+ uint8_t kernel_zero_point[4];
+ float scale;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } fp32_neonv8;
+ struct {
+ uint8_t kernel_zero_point[4];
+ int32_t right_pre_shift;
+ int32_t multiplier;
+ int32_t right_post_shift;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } rndnu_neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int16_t kernel_zero_point[8];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ } fp32_sse2;
+ struct {
+ XNN_ALIGN(32) int16_t kernel_zero_point[16];
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(32) uint8_t output_min[32];
+ } fp32_avx2;
+ struct {
+ XNN_ALIGN(64) int16_t kernel_zero_point[32];
+ XNN_ALIGN(64) float scale[16];
+ XNN_ALIGN(64) float output_max_less_zero_point[16];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(64) uint8_t output_min[64];
+ } fp32_avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t kernel_zero_point[4];
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } fp32_wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Add/Sub Min+Max: used by quantized VADD[C] microkernels with MINMAX activation.
+
+union xnn_qs8_addsub_minmax_params {
+ struct {
+ int32_t bias;
+ int32_t a_multiplier;
+ int32_t b_multiplier;
+ uint32_t shift;
+ int32_t output_min_less_zero_point;
+ int32_t output_max_less_zero_point;
+ int32_t output_zero_point;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int8_t a_zero_point;
+ int8_t b_zero_point;
+ int16_t output_zero_point;
+ int32_t a_multiplier;
+ int32_t b_multiplier;
+ int32_t right_shift;
+ int8_t output_min;
+ int8_t output_max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int32_t bias[4];
+ XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
+ XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
+ XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
+ XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
+ uint32_t shift;
+ uint32_t b_multiplier;
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int16_t output_min[8];
+ XNN_ALIGN(16) int16_t output_max[8];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int32_t bias[4];
+ XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
+ XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
+ XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
+ XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
+ uint32_t shift;
+ uint32_t b_multiplier;
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ XNN_ALIGN(16) int8_t output_max[16];
+ } sse4_mul16;
+ struct {
+ XNN_ALIGN(16) int32_t bias[4];
+ XNN_ALIGN(16) int32_t a_multiplier[4];
+ XNN_ALIGN(16) int32_t b_multiplier[4];
+ XNN_ALIGN(16) uint64_t shift[2];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ XNN_ALIGN(16) int8_t output_max[16];
+ } sse4_mul32;
+ struct {
+ XNN_ALIGN(32) int32_t bias[8];
+ XNN_ALIGN(32) int32_t a_multiplier[8];
+ XNN_ALIGN(32) int32_t b_multiplier[8];
+ XNN_ALIGN(32) uint64_t shift[4];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(16) int8_t output_min[16];
+ XNN_ALIGN(16) int8_t output_max[16];
+ } avx2;
+ struct {
+ XNN_ALIGN(64) int32_t bias[16];
+ XNN_ALIGN(64) int32_t a_multiplier[16];
+ XNN_ALIGN(64) int32_t b_multiplier[16];
+ XNN_ALIGN(64) uint64_t shift[8];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(32) int8_t output_min[32];
+ XNN_ALIGN(32) int8_t output_max[32];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int32_t bias[2];
+ XNN_ALIGN(8) int32_t a_multiplier[2];
+ XNN_ALIGN(8) int32_t b_multiplier[2];
+ uint32_t shift;
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ XNN_ALIGN(8) int8_t output_min[8];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_addsub_minmax_params {
+ struct {
+ int32_t bias;
+ int32_t a_multiplier;
+ int32_t b_multiplier;
+ int32_t rounding;
+ uint32_t shift;
+ int32_t output_min_less_zero_point;
+ int32_t output_max_less_zero_point;
+ int32_t output_zero_point;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint8_t a_zero_point;
+ uint8_t b_zero_point;
+ int16_t output_zero_point;
+ int32_t a_multiplier;
+ int32_t b_multiplier;
+ int32_t right_shift;
+ uint8_t output_min;
+ uint8_t output_max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int32_t bias[4];
+ XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
+ XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
+ XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
+ XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
+ uint32_t shift;
+ uint32_t b_multiplier;
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ XNN_ALIGN(16) uint8_t output_max[16];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int32_t bias[4];
+ XNN_ALIGN(16) int32_t a_multiplier[4];
+ XNN_ALIGN(16) int32_t b_multiplier[4];
+ XNN_ALIGN(16) uint64_t shift[2];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ XNN_ALIGN(16) uint8_t output_max[16];
+ } sse4;
+ struct {
+ XNN_ALIGN(32) int32_t bias[8];
+ XNN_ALIGN(32) int32_t a_multiplier[8];
+ XNN_ALIGN(32) int32_t b_multiplier[8];
+ XNN_ALIGN(32) uint64_t shift[4];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ XNN_ALIGN(16) uint8_t output_max[16];
+ } avx2;
+ struct {
+ XNN_ALIGN(64) int32_t bias[16];
+ XNN_ALIGN(64) int32_t a_multiplier[16];
+ XNN_ALIGN(64) int32_t b_multiplier[16];
+ XNN_ALIGN(64) uint64_t shift[8];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(32) uint8_t output_min[32];
+ XNN_ALIGN(32) uint8_t output_max[32];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int32_t bias[2];
+ XNN_ALIGN(8) int32_t a_multiplier[2];
+ XNN_ALIGN(8) int32_t b_multiplier[2];
+ uint32_t shift;
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ XNN_ALIGN(8) uint8_t output_min[8];
+ XNN_ALIGN(8) uint8_t output_max[8];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Mul Min+Max: used by quantized VMUL[C] microkernels with MINMAX activation.
+
+union xnn_qs8_mul_minmax_params {
+ struct {
+ int32_t a_zero_point;
+ int32_t b_zero_point;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } fp32_scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int8_t a_zero_point[2];
+ int8_t b_zero_point[2];
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } fp32_neon;
+ struct {
+ int8_t a_zero_point[2];
+ int8_t b_zero_point[2];
+ float scale;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } fp32_neonv8;
+ struct {
+ int8_t a_zero_point[2];
+ int8_t b_zero_point[2];
+ int32_t left_pre_shift;
+ int32_t multiplier;
+ int32_t left_post_shift;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } rndnu_neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int16_t a_zero_point[8];
+ XNN_ALIGN(16) int16_t b_zero_point[8];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int16_t output_min[8];
+ XNN_ALIGN(16) int16_t output_max[8];
+ } fp32_sse2;
+ struct {
+ XNN_ALIGN(16) int16_t a_zero_point[8];
+ XNN_ALIGN(16) int16_t b_zero_point[8];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ XNN_ALIGN(16) int8_t output_max[16];
+ } fp32_sse4;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t a_zero_point[4];
+ XNN_ALIGN(8) int16_t b_zero_point[4];
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } fp32_wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_mul_minmax_params {
+ struct {
+ int32_t a_zero_point;
+ int32_t b_zero_point;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } fp32_scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint8_t a_zero_point[2];
+ uint8_t b_zero_point[2];
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } fp32_neon;
+ struct {
+ uint8_t a_zero_point[2];
+ uint8_t b_zero_point[2];
+ float scale;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } fp32_neonv8;
+ struct {
+ uint8_t a_zero_point[2];
+ uint8_t b_zero_point[2];
+ int32_t left_pre_shift;
+ int32_t multiplier;
+ int32_t left_post_shift;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } rndnu_neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int16_t a_zero_point[8];
+ XNN_ALIGN(16) int16_t b_zero_point[8];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ XNN_ALIGN(16) uint8_t output_max[16];
+ } fp32_sse2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t a_zero_point[4];
+ XNN_ALIGN(8) int16_t b_zero_point[4];
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) uint8_t output_max[8];
+ } fp32_wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// AvgPool Min+Max: used by quantized GAVGPOOL microkernels with MINMAX activation.
+
+union xnn_qs8_avgpool_minmax_params {
+ struct {
+ int32_t init_bias;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } fp32_scalar_fmagic;
+ struct {
+ int32_t init_bias;
+ float scale;
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } fp32_scalar_imagic;
+ struct {
+ int32_t init_bias;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } fp32_scalar_lrintf;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int32_t init_bias;
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } fp32_neon;
+ struct {
+ int32_t init_bias;
+ float scale;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } fp32_neonv8;
+ struct {
+ int32_t init_bias;
+ int32_t left_pre_shift;
+ int32_t multiplier;
+ int32_t left_post_shift;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } rndnu_neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int32_t init_bias[4];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int16_t output_min[8];
+ } fp32_sse2;
+ struct {
+ XNN_ALIGN(16) int32_t init_bias[4];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ } fp32_sse4;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int32_t init_bias[2];
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } fp32_wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_avgpool_minmax_params {
+ struct {
+ int32_t init_bias;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ } fp32_scalar_fmagic;
+ struct {
+ int32_t init_bias;
+ float scale;
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } fp32_scalar_imagic;
+ struct {
+ int32_t init_bias;
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } fp32_scalar_lrintf;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int32_t init_bias;
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } fp32_neon;
+ struct {
+ int32_t init_bias;
+ float scale;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } fp32_neonv8;
+ struct {
+ int32_t init_bias;
+ int32_t left_pre_shift;
+ int32_t multiplier;
+ int32_t left_post_shift;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } rndnu_neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int32_t init_bias[4];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ } fp32_sse2;
+ struct {
+ XNN_ALIGN(16) int32_t init_bias[4];
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ } fp32_sse4;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int32_t init_bias[2];
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
+ XNN_ALIGN(8) uint8_t output_max[8];
+ } fp32_wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+
+ // Legacy parameters used by QU8 AVGPOOL microkernels
+ struct {
+ int32_t bias;
+ int32_t multiplier;
+ int64_t rounding;
+ uint32_t right_shift;
+ int32_t output_min_less_zero_point;
+ int32_t output_max_less_zero_point;
+ int32_t output_zero_point;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int32_t bias;
+ int32_t multiplier;
+ int64_t left_shift;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int32_t bias[4];
+ XNN_ALIGN(16) uint32_t multiplier[4];
+ XNN_ALIGN(16) uint64_t rounding[2];
+ XNN_ALIGN(16) uint64_t right_shift[2];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ XNN_ALIGN(16) uint8_t output_max[16];
+ } sse2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+
+// Abs: used by VABS microkernels.
+
+union xnn_f16_abs_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint16_t nonsign_mask[8];
+ } sse;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_abs_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float nonsign_mask[4];
+ } sse;
+ struct {
+ XNN_ALIGN(32) float nonsign_mask[8];
+ int32_t mask_table[14];
+ } avx;
+ struct {
+ uint32_t nonsign_mask;
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float nonsign_mask[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Cvt (Convert): used by VCVT microkernels.
+
+union xnn_f16_f32_cvt_params {
+ struct {
+ uint32_t sign_mask;
+ uint32_t exp_offset;
+ float exp_scale;
+ uint32_t magic_mask;
+ float magic_bias;
+ uint32_t denorm_cutoff;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float exp_scale;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint16_t sign_mask[8];
+ XNN_ALIGN(16) uint16_t exp_offset[8];
+ XNN_ALIGN(16) float exp_scale[4];
+ XNN_ALIGN(16) uint16_t magic_mask[8];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) int16_t denorm_cutoff[8];
+ } sse_int16;
+ struct {
+ XNN_ALIGN(16) uint32_t sign_mask[4];
+ XNN_ALIGN(16) uint32_t exp_offset[4];
+ XNN_ALIGN(16) float exp_scale[4];
+ XNN_ALIGN(16) uint32_t magic_bias[4];
+ XNN_ALIGN(16) int32_t denorm_cutoff[4];
+ } sse_int32;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) uint16_t sign_mask[4];
+ XNN_ALIGN(8) uint16_t exp_offset[4];
+ XNN_ALIGN(8) float exp_scale[2];
+ XNN_ALIGN(8) uint16_t magic_mask[4];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int16_t denorm_cutoff[4];
+ } wasmsimd_int16;
+ struct {
+ XNN_ALIGN(8) uint32_t sign_mask[2];
+ XNN_ALIGN(8) uint32_t exp_offset[2];
+ XNN_ALIGN(8) float exp_scale[2];
+ XNN_ALIGN(8) uint32_t magic_bias[2];
+ XNN_ALIGN(8) int32_t denorm_cutoff[2];
+ } wasmsimd_int32;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_f32_f16_cvt_params {
+ struct {
+ uint32_t nonsign_mask;
+ uint32_t exp_bias;
+ float scale_to_inf;
+ uint32_t expw_max;
+ float scale_to_zero;
+ uint32_t bias_min;
+ uint16_t exph_mask;
+ uint16_t manth_mask;
+ uint16_t nanh;
+ } scalar_bitcast;
+ struct {
+ float scale_to_inf;
+ uint32_t exp_bias;
+ float scale_to_zero;
+ uint32_t expw_max;
+ uint32_t bias_min;
+ uint16_t exph_mask;
+ uint16_t manth_mask;
+ uint16_t nanh;
+ } scalar_fabsf;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint32_t exp_bias;
+ float scale_to_inf;
+ uint32_t expw_max;
+ float scale_to_zero;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint32_t nonsign_mask[4];
+ XNN_ALIGN(16) uint32_t exp_bias[4];
+ XNN_ALIGN(16) float scale_to_inf[4];
+ XNN_ALIGN(16) uint32_t expw_max[4];
+ XNN_ALIGN(16) float scale_to_zero[4];
+ XNN_ALIGN(16) int16_t bias_min[8];
+ XNN_ALIGN(16) uint32_t manth_mask[4];
+ XNN_ALIGN(16) uint32_t exph_mask[4];
+ XNN_ALIGN(16) uint16_t nanh[8];
+ } sse2;
+ struct {
+ int32_t mask_table[14];
+ } f16c;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) uint32_t exp_bias[2];
+ XNN_ALIGN(8) float scale_to_inf[2];
+ XNN_ALIGN(8) uint32_t expw_max[2];
+ XNN_ALIGN(8) float scale_to_zero[2];
+ XNN_ALIGN(8) int16_t bias_min[4];
+ XNN_ALIGN(8) uint32_t manth_mask[2];
+ XNN_ALIGN(8) uint32_t exph_mask[2];
+ XNN_ALIGN(8) uint16_t nanh[4];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_f32_qs8_cvt_params {
+ struct {
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_zero_point;
+ } scalar_fmagic;
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } scalar_imagic;
+ struct {
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } scalar_lrintf;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } neon;
+ struct {
+ float scale;
+ int16_t output_zero_point;
+ int8_t output_min;
+ int8_t output_max;
+ } neonv8;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int16_t output_min[8];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ } sse4;
+ struct {
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) int8_t output_min[16];
+ int32_t mask_table[14];
+ } avx;
+ struct {
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(32) uint32_t shuffle_mask[8];
+ XNN_ALIGN(32) int8_t output_min[32];
+ int32_t mask_table[14];
+ } avx2;
+ struct {
+ XNN_ALIGN(64) float scale[16];
+ XNN_ALIGN(64) float output_max_less_zero_point[16];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(64) int8_t output_min[64];
+ XNN_ALIGN(64) uint32_t shuffle512_mask[16];
+ XNN_ALIGN(32) uint32_t shuffle256_mask[8];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ XNN_ALIGN(8) int8_t output_min[8];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } wasmsimd_cvt;
+ struct {
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2];
+ XNN_ALIGN(8) int8_t output_max[8];
+ } wasmsimd_magic;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_f32_qu8_cvt_params {
+ struct {
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ float magic_bias;
+ int32_t magic_bias_less_zero_point;
+ } scalar_fmagic;
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_min;
+ int32_t magic_max;
+ int32_t magic_bias_less_zero_point;
+ } scalar_imagic;
+ struct {
+ float scale;
+ float output_min_less_zero_point;
+ float output_max_less_zero_point;
+ int32_t output_zero_point;
+ } scalar_lrintf;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float scale;
+ float magic_bias;
+ int32_t magic_bias_less_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } neon;
+ struct {
+ float scale;
+ int16_t output_zero_point;
+ uint8_t output_min;
+ uint8_t output_max;
+ } neonv8;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float scale[4];
+ XNN_ALIGN(16) float output_max_less_zero_point[4];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ } sse2;
+ struct {
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ XNN_ALIGN(16) uint8_t output_min[16];
+ int32_t mask_table[14];
+ } avx;
+ struct {
+ XNN_ALIGN(32) float scale[8];
+ XNN_ALIGN(32) float output_max_less_zero_point[8];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ XNN_ALIGN(32) uint32_t shuffle_mask[8];
+ XNN_ALIGN(32) uint8_t output_min[32];
+ int32_t mask_table[14];
+ } avx2;
+ struct {
+ XNN_ALIGN(64) float scale[16];
+ XNN_ALIGN(64) float output_max_less_zero_point[16];
+ XNN_ALIGN(64) int16_t output_zero_point[32];
+ XNN_ALIGN(64) uint8_t output_min[64];
+ XNN_ALIGN(64) uint32_t shuffle512_mask[16];
+ XNN_ALIGN(32) uint32_t shuffle256_mask[8];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ XNN_ALIGN(8) uint8_t output_min[8];
+ XNN_ALIGN(8) uint8_t output_max[8];
+ } wasmsimd_cvt;
+ struct {
+ XNN_ALIGN(8) float scale[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) int32_t magic_min[2];
+ XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2];
+ XNN_ALIGN(8) uint8_t output_max[8];
+ } wasmsimd_magic;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qs8_cvt_params {
+ struct {
+ int32_t bias;
+ int32_t multiplier;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint32_t minus_input_zero_point;
+ int32_t multiplier;
+ int32_t bias;
+ } armv6simd;
+ struct {
+ int16_t input_zero_point;
+ int16_t multiplier;
+ int16_t output_zero_point;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int16_t multiplier[8];
+ XNN_ALIGN(16) int32_t bias[4];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int16_t input_zero_point[8];
+ XNN_ALIGN(16) int16_t multiplier[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ } ssse3;
+ struct {
+ XNN_ALIGN(32) int16_t input_zero_point[16];
+ XNN_ALIGN(32) int16_t multiplier[16];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ } avx2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t input_zero_point[4];
+ XNN_ALIGN(8) int16_t multiplier[4];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qs8_f32_cvt_params {
+ struct {
+ int32_t zero_point;
+ float scale;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int16_t minus_zero_point[2];
+ float scale;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint8_t sign_mask[16];
+ XNN_ALIGN(16) uint16_t magic_exp[8];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float scale[4];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int32_t minus_zero_point[4];
+ XNN_ALIGN(16) float scale[4];
+ } sse4;
+ struct {
+ XNN_ALIGN(32) int32_t minus_zero_point[8];
+ XNN_ALIGN(32) float scale[8];
+ } avx;
+ struct {
+ XNN_ALIGN(64) int32_t minus_zero_point[16];
+ XNN_ALIGN(64) float scale[16];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t minus_zero_point[4];
+ XNN_ALIGN(8) float scale[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_cvt_params {
+ struct {
+ int32_t bias;
+ int32_t multiplier;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint32_t minus_input_zero_point;
+ int32_t multiplier;
+ int32_t bias;
+ } armv6simd;
+ struct {
+ uint16_t input_zero_point;
+ int16_t multiplier;
+ int16_t output_zero_point;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint16_t multiplier[8];
+ XNN_ALIGN(16) int32_t bias[4];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) uint16_t input_zero_point[8];
+ XNN_ALIGN(16) int16_t multiplier[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ } ssse3;
+ struct {
+ XNN_ALIGN(32) uint16_t input_zero_point[16];
+ XNN_ALIGN(32) int16_t multiplier[16];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ } avx2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) uint16_t input_zero_point[4];
+ XNN_ALIGN(8) int16_t multiplier[4];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_f32_cvt_params {
+ struct {
+ int32_t zero_point;
+ float scale;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ int16_t minus_zero_point[2];
+ float scale;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint16_t magic_exp[8];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float scale[4];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int32_t minus_zero_point[4];
+ XNN_ALIGN(16) float scale[4];
+ } sse4;
+ struct {
+ XNN_ALIGN(32) int32_t minus_zero_point[8];
+ XNN_ALIGN(32) float scale[8];
+ } avx;
+ struct {
+ XNN_ALIGN(64) int32_t minus_zero_point[16];
+ XNN_ALIGN(64) float scale[16];
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t minus_zero_point[4];
+ XNN_ALIGN(8) float scale[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// ELU: used by VELU microkernels.
+
+union xnn_f16_elu_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t prescale;
+ uint16_t sat_cutoff;
+ uint16_t magic_bias;
+ uint16_t log2e;
+ uint16_t minus_ln2;
+ uint16_t c3;
+ uint16_t c2;
+ uint16_t minus_alpha;
+ uint16_t beta;
+ } neonfp16arith_rr1_p3;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float c1[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ } avx2_rr1_p3;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_elu_params {
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c3;
+ float c2;
+ float one;
+ } scalar_rr2_lut16_p3;
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c6;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float one;
+ } scalar_rr2_p6;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c6;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ } neon_rr2_p6;
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c3;
+ float c2;
+ } neon_rr2_lut16_p3;
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2;
+ float c6;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ } neonfma_rr1_p6;
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2;
+ float c3;
+ float c2;
+ } neonfma_rr1_lut16_p3;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float prescale[4];
+ XNN_ALIGN(16) float alpha[4];
+ XNN_ALIGN(16) float beta[4];
+ XNN_ALIGN(16) float sat_cutoff[4];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float log2e[4];
+ XNN_ALIGN(16) uint32_t index_mask[4];
+ XNN_ALIGN(16) float minus_ln2_hi[4];
+ XNN_ALIGN(16) float minus_ln2_lo[4];
+ XNN_ALIGN(16) float c3[4];
+ XNN_ALIGN(16) float c2[4];
+ XNN_ALIGN(16) float one[4];
+ } sse2_rr2_lut16_p3;
+ struct {
+ XNN_ALIGN(16) float prescale[4];
+ XNN_ALIGN(16) float alpha[4];
+ XNN_ALIGN(16) float beta[4];
+ XNN_ALIGN(16) float sat_cutoff[4];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float log2e[4];
+ XNN_ALIGN(16) float minus_ln2_hi[4];
+ XNN_ALIGN(16) float minus_ln2_lo[4];
+ XNN_ALIGN(16) float c6[4];
+ XNN_ALIGN(16) float c5[4];
+ XNN_ALIGN(16) float c4[4];
+ XNN_ALIGN(16) float c3[4];
+ XNN_ALIGN(16) float c2[4];
+ XNN_ALIGN(16) float one[4];
+ } sse2_rr2_p6;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) uint32_t index_mask[8];
+ XNN_ALIGN(32) float minus_ln2_hi[8];
+ XNN_ALIGN(32) float minus_ln2_lo[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float one[8];
+ int32_t mask_table[14];
+ } avx_rr2_lut16_p3;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) uint32_t index_mask[8];
+ XNN_ALIGN(32) float table[8];
+ XNN_ALIGN(32) float minus_ln2_hi[8];
+ XNN_ALIGN(32) float minus_ln2_lo[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float one[8];
+ int32_t mask_table[14];
+ } avx_rr2_lut4_p4;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2_hi[8];
+ XNN_ALIGN(32) float minus_ln2_lo[8];
+ XNN_ALIGN(32) float c6[8];
+ XNN_ALIGN(32) float c5[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float one[8];
+ int32_t mask_table[14];
+ } avx_rr2_p6;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) uint32_t index_mask[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ int32_t mask_table[14];
+ } avx2_rr1_lut16_p3;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) uint32_t table[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ int32_t mask_table[14];
+ } avx2_rr1_lut8_p4;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float table[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ int32_t mask_table[14];
+ } avx2_rr1_lut4_p4;
+ struct {
+ XNN_ALIGN(32) float prescale[8];
+ XNN_ALIGN(32) float alpha[8];
+ XNN_ALIGN(32) float beta[8];
+ XNN_ALIGN(32) float sat_cutoff[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c6[8];
+ XNN_ALIGN(32) float c5[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ int32_t mask_table[14];
+ } avx2_rr1_p6;
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2;
+ float c3;
+ float c2;
+ XNN_ALIGN(64) uint32_t table[16];
+ } avx512_rr1_lut16_p3;
+ struct {
+ float prescale;
+ float alpha;
+ float beta;
+ float sat_cutoff;
+ float magic_bias;
+ float log2e;
+ float minus_ln2;
+ float c6;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ } avx512_rr1_p6;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float prescale[2];
+ XNN_ALIGN(8) float alpha[2];
+ XNN_ALIGN(8) float beta[2];
+ XNN_ALIGN(8) float sat_cutoff[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) float log2e[2];
+ XNN_ALIGN(8) uint32_t index_mask[2];
+ XNN_ALIGN(8) float minus_ln2_hi[2];
+ XNN_ALIGN(8) float minus_ln2_lo[2];
+ XNN_ALIGN(8) float c3[2];
+ XNN_ALIGN(8) float c2[2];
+ XNN_ALIGN(8) float one[2];
+ } wasmsimd_rr2_lut16_p3;
+ struct {
+ XNN_ALIGN(8) float prescale[2];
+ XNN_ALIGN(8) float alpha[2];
+ XNN_ALIGN(8) float beta[2];
+ XNN_ALIGN(8) float sat_cutoff[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) float log2e[2];
+ XNN_ALIGN(8) float minus_ln2_hi[2];
+ XNN_ALIGN(8) float minus_ln2_lo[2];
+ XNN_ALIGN(8) float c6[2];
+ XNN_ALIGN(8) float c5[2];
+ XNN_ALIGN(8) float c4[2];
+ XNN_ALIGN(8) float c3[2];
+ XNN_ALIGN(8) float c2[2];
+ XNN_ALIGN(8) float one[2];
+ } wasmsimd_rr2_p6;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// ExpMinus: used by RADDEXPMINUSMAX microkernels.
+
+union xnn_f16_expminus_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t magic_bias;
+ uint16_t log2e;
+ uint16_t minus_ln2_hi;
+ uint16_t minus_ln2_lo;
+ uint16_t c2;
+ uint16_t c1;
+ uint16_t denorm_cutoff;
+ } neonfp16arith_rr2_p2;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float c1[8];
+ XNN_ALIGN(32) float denorm_cutoff[8];
+ } avx2_rr1_p2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_expminus_params {
+ struct {
+ float log2e;
+ float magic_bias;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float denorm_cutoff;
+ } scalar_rr2_p5;
+ struct {
+ float log2e;
+ float magic_bias;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c2;
+ float denorm_cutoff;
+ } scalar_rr2_lut64_p2;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float log2e;
+ float magic_bias;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float denorm_cutoff;
+ } neon_rr2_p5;
+ struct {
+ float log2e;
+ float magic_bias;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c2;
+ float denorm_cutoff;
+ } neon_rr2_lut64_p2;
+ struct {
+ float log2e;
+ float magic_bias;
+ float minus_ln2;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float denorm_cutoff;
+ } neonfma_rr1_p5;
+ struct {
+ float log2e;
+ float magic_bias;
+ float minus_ln2;
+ float c2;
+ float denorm_cutoff;
+ } neonfma_rr1_lut64_p2;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float log2e[4];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float minus_ln2_hi[4];
+ XNN_ALIGN(16) float minus_ln2_lo[4];
+ XNN_ALIGN(16) float c5[4];
+ XNN_ALIGN(16) float c4[4];
+ XNN_ALIGN(16) float c3[4];
+ XNN_ALIGN(16) float c2[4];
+ XNN_ALIGN(16) float c1[4];
+ XNN_ALIGN(16) float denorm_cutoff[4];
+ } sse2_rr2_p5;
+ struct {
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c5[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float c1[8];
+ XNN_ALIGN(32) float denorm_cutoff[8];
+ int32_t mask_table[14];
+ } avx2_rr1_p5;
+ struct {
+ float log2e;
+ float minus_ln2;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float c0;
+ } avx512_rr1_p5;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float log2e[2];
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) float minus_ln2_hi[2];
+ XNN_ALIGN(8) float minus_ln2_lo[2];
+ XNN_ALIGN(8) float c5[2];
+ XNN_ALIGN(8) float c4[2];
+ XNN_ALIGN(8) float c3[2];
+ XNN_ALIGN(8) float c2[2];
+ XNN_ALIGN(8) float c1[2];
+ XNN_ALIGN(8) float denorm_cutoff[2];
+ } wasmsimd_rr2_p5;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// HSwish: used by VHSWISH microkernels.
+
+union xnn_f16_hswish_params {
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t sixth;
+ uint16_t three;
+ uint16_t six;
+ uint16_t pad; // pad to 8 bytes for neonfp16arith assembly.
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float sixth[8];
+ XNN_ALIGN(32) float three[8];
+ XNN_ALIGN(16) uint16_t six[8];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_hswish_params {
+ struct {
+ float sixth;
+ float three;
+ float six;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float sixth[4];
+ XNN_ALIGN(16) float half[4];
+ XNN_ALIGN(16) float one[4];
+ } sse;
+ struct {
+ XNN_ALIGN(32) float sixth[8];
+ XNN_ALIGN(32) float half[8];
+ XNN_ALIGN(32) float one[8];
+ int32_t mask_table[14];
+ } avx;
+ struct {
+ float sixth;
+ float half;
+ float one;
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float sixth[2];
+ XNN_ALIGN(8) float three[2];
+ XNN_ALIGN(8) float six[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// LReLU (Leaky ReLU): used by VLRELU microkernels.
+
+union xnn_f16_lrelu_params {
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t slope;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float slope[8];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_lrelu_params {
+ struct {
+ float slope;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float slope[4];
+ } sse;
+ struct {
+ XNN_ALIGN(32) float slope[8];
+ int32_t mask_table[14];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float slope[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qs8_lrelu_params {
+ struct {
+ int32_t input_zero_point;
+ int32_t positive_multiplier;
+ int32_t negative_multiplier;
+ int32_t bias;
+ } scalar_select;
+ struct {
+ int32_t input_zero_point;
+ int32_t multiplier_diff;
+ int32_t multiplier_base;
+ int32_t bias;
+ } scalar_andxor;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint32_t input_zero_point;
+ uint32_t positive_multiplier;
+ uint32_t negative_multiplier;
+ int32_t bias;
+ } armv6simd;
+ struct {
+ int16_t input_zero_point;
+ int16_t positive_multiplier;
+ int16_t negative_multiplier;
+ int16_t output_zero_point;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int16_t input_zero_point[8];
+ XNN_ALIGN(16) int16_t multiplier_diff[8];
+ XNN_ALIGN(16) int16_t multiplier_base[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int16_t input_zero_point[8];
+ XNN_ALIGN(16) int16_t positive_multiplier[8];
+ XNN_ALIGN(16) int16_t negative_multiplier[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ } avx;
+ struct {
+ XNN_ALIGN(32) int16_t input_zero_point[16];
+ XNN_ALIGN(32) int16_t positive_multiplier[16];
+ XNN_ALIGN(32) int16_t negative_multiplier[16];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ } avx2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t input_zero_point[4];
+ XNN_ALIGN(8) int16_t positive_multiplier[4];
+ XNN_ALIGN(8) int16_t negative_multiplier[4];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ } wasmsimd_arm;
+ struct {
+ XNN_ALIGN(8) int16_t input_zero_point[4];
+ XNN_ALIGN(8) int16_t multiplier_diff[4];
+ XNN_ALIGN(8) int16_t multiplier_base[4];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ } wasmsimd_x86;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+union xnn_qu8_lrelu_params {
+ struct {
+ int32_t input_zero_point;
+ int32_t positive_multiplier;
+ int32_t negative_multiplier;
+ int32_t bias;
+ } scalar_select;
+ struct {
+ int32_t input_zero_point;
+ int32_t multiplier_base;
+ int32_t multiplier_diff;
+ int32_t bias;
+ } scalar_andxor;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint32_t input_zero_point;
+ uint32_t positive_multiplier;
+ uint32_t negative_multiplier;
+ int32_t bias;
+ } armv6simd;
+ struct {
+ uint16_t input_zero_point;
+ int16_t positive_multiplier;
+ int16_t negative_multiplier;
+ int16_t output_zero_point;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) int16_t input_zero_point[8];
+ XNN_ALIGN(16) int16_t multiplier_diff[8];
+ XNN_ALIGN(16) int16_t multiplier_base[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ } sse2;
+ struct {
+ XNN_ALIGN(16) int16_t input_zero_point[8];
+ XNN_ALIGN(16) int16_t positive_multiplier[8];
+ XNN_ALIGN(16) int16_t negative_multiplier[8];
+ XNN_ALIGN(16) int16_t output_zero_point[8];
+ } avx;
+ struct {
+ XNN_ALIGN(32) int16_t input_zero_point[16];
+ XNN_ALIGN(32) int16_t positive_multiplier[16];
+ XNN_ALIGN(32) int16_t negative_multiplier[16];
+ XNN_ALIGN(32) int16_t output_zero_point[16];
+ } avx2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) int16_t input_zero_point[4];
+ XNN_ALIGN(8) int16_t positive_multiplier[4];
+ XNN_ALIGN(8) int16_t negative_multiplier[4];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ } wasmsimd_arm;
+ struct {
+ XNN_ALIGN(8) int16_t input_zero_point[4];
+ XNN_ALIGN(8) int16_t multiplier_diff[4];
+ XNN_ALIGN(8) int16_t multiplier_base[4];
+ XNN_ALIGN(8) int16_t output_zero_point[4];
+ } wasmsimd_x86;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Neg: used by VNEG microkernels.
+
+union xnn_f16_neg_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) uint16_t sign_mask[8];
+ } sse;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_neg_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float sign_mask[4];
+ } sse;
+ struct {
+ XNN_ALIGN(32) float sign_mask[8];
+ int32_t mask_table[14];
+ } avx;
+ struct {
+ uint32_t sign_mask;
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float sign_mask[2];
+ } wasmsimd;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Rnd (Round): used by VRNDNE/VRNDU/VRNDD/VRNDZ microkernels.
+
+union xnn_f16_rnd_params {
+ char _; // Dummy member variable to comply with the C standard
+};
+
+union xnn_f32_rnd_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float sign_mask[4];
+ XNN_ALIGN(16) float one[4];
+ } sse2;
+ struct {
+ int32_t mask_table[14];
+ } avx;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+
+// Sigmoid: used by VSIGMOID microkernels.
+
+union xnn_f16_sigmoid_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t magic_bias;
+ uint16_t minus_log2e;
+ uint16_t ln2_hi;
+ uint16_t ln2_lo;
+ uint16_t c2;
+ uint16_t c1;
+ uint16_t denorm_cutoff;
+ } neonfp16arith_rr2_p2;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(32) float sign_mask[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float c1[8];
+ XNN_ALIGN(32) float one[8];
+ XNN_ALIGN(32) float denorm_cutoff[8];
+ } avx2_rr1_p2;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+union xnn_f32_sigmoid_params {
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2_hi;
+ float ln2_lo;
+ float c1;
+ float one;
+ float denorm_cutoff;
+ } scalar_rr2_lut2048_p1;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2_hi;
+ float ln2_lo;
+ float c2;
+ float one;
+ float denorm_cutoff;
+ } scalar_rr2_lut64_p2;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2_hi;
+ float ln2_lo;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float one;
+ float denorm_cutoff;
+ } scalar_rr2_p5;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2_hi;
+ float ln2_lo;
+ float c1;
+ float denorm_cutoff;
+ } neon_rr2_lut2048_p1;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2_hi;
+ float ln2_lo;
+ float c2;
+ float denorm_cutoff;
+ } neon_rr2_lut64_p2;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2_hi;
+ float ln2_lo;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float denorm_cutoff;
+ } neon_rr2_p5;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2;
+ float c1;
+ float denorm_cutoff;
+ } neonfma_rr1_lut2048_p1;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2;
+ float c2;
+ float denorm_cutoff;
+ } neonfma_rr1_lut64_p2;
+ struct {
+ float magic_bias;
+ float minus_log2e;
+ float ln2;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float denorm_cutoff;
+ } neonfma_rr1_p5;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float sign_mask[4];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float log2e[4];
+ XNN_ALIGN(16) uint32_t index_mask[4];
+ XNN_ALIGN(16) float minus_ln2_hi[4];
+ XNN_ALIGN(16) float minus_ln2_lo[4];
+ XNN_ALIGN(16) float c2[4];
+ XNN_ALIGN(16) float one[4];
+ XNN_ALIGN(16) float denorm_cutoff[4];
+ } sse2_rr2_lut64_p2;
+ struct {
+ XNN_ALIGN(16) float sign_mask[4];
+ XNN_ALIGN(16) float magic_bias[4];
+ XNN_ALIGN(16) float log2e[4];
+ XNN_ALIGN(16) float minus_ln2_hi[4];
+ XNN_ALIGN(16) float minus_ln2_lo[4];
+ XNN_ALIGN(16) float c5[4];
+ XNN_ALIGN(16) float c4[4];
+ XNN_ALIGN(16) float c3[4];
+ XNN_ALIGN(16) float c2[4];
+ XNN_ALIGN(16) float c1[4];
+ XNN_ALIGN(16) float one[4];
+ XNN_ALIGN(16) float denorm_cutoff[4];
+ } sse2_rr2_p5;
+ struct {
+ XNN_ALIGN(32) float sign_mask[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2_hi[8];
+ XNN_ALIGN(32) float minus_ln2_lo[8];
+ XNN_ALIGN(32) float c5[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float c1[8];
+ XNN_ALIGN(32) float one[8];
+ XNN_ALIGN(32) float two[8];
+ XNN_ALIGN(32) float denorm_cutoff[8];
+ int32_t mask_table[14];
+ } avx_rr2_p5;
+ struct {
+ XNN_ALIGN(32) float sign_mask[8];
+ XNN_ALIGN(32) float magic_bias[8];
+ XNN_ALIGN(32) float log2e[8];
+ XNN_ALIGN(32) float minus_ln2[8];
+ XNN_ALIGN(32) float c5[8];
+ XNN_ALIGN(32) float c4[8];
+ XNN_ALIGN(32) float c3[8];
+ XNN_ALIGN(32) float c2[8];
+ XNN_ALIGN(32) float c1[8];
+ XNN_ALIGN(32) float one[8];
+ XNN_ALIGN(32) float denorm_cutoff[8];
+ int32_t mask_table[14];
+ } avx2_rr1_p5;
+ struct {
+ uint32_t sign_mask;
+ float magic_bias;
+ float log2e;
+ float minus_ln2;
+ float c3;
+ float c2;
+ float one;
+ XNN_ALIGN(64) float table[16];
+ } avx512_rr1_lut16_p3;
+ struct {
+ uint32_t sign_mask;
+ float magic_bias;
+ float log2e;
+ float minus_ln2_hi;
+ float minus_ln2_lo;
+ float c2;
+ float c1;
+ float one;
+ XNN_ALIGN(64) float table_lo[16];
+ XNN_ALIGN(64) float table_hi[16];
+ } avx512_rr2_lut32_p2;
+ struct {
+ uint32_t sign_mask;
+ float log2e;
+ float minus_ln2;
+ float c5;
+ float c4;
+ float c3;
+ float c2;
+ float c1;
+ float one;
+ } avx512_rr1_p5;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+ struct {
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) float minus_log2e[2];
+ XNN_ALIGN(8) uint32_t index_mask[2];
+ XNN_ALIGN(8) float ln2_hi[2];
+ XNN_ALIGN(8) float ln2_lo[2];
+ XNN_ALIGN(8) float c2[2];
+ XNN_ALIGN(8) float one[2];
+ XNN_ALIGN(8) float denorm_cutoff[2];
+ } wasmsimd_rr2_lut64_p2;
+ struct {
+ XNN_ALIGN(8) float magic_bias[2];
+ XNN_ALIGN(8) float minus_log2e[2];
+ XNN_ALIGN(8) float ln2_hi[2];
+ XNN_ALIGN(8) float ln2_lo[2];
+ XNN_ALIGN(8) float c5[2];
+ XNN_ALIGN(8) float c4[2];
+ XNN_ALIGN(8) float c3[2];
+ XNN_ALIGN(8) float c2[2];
+ XNN_ALIGN(8) float c1[2];
+ XNN_ALIGN(8) float one[2];
+ XNN_ALIGN(8) float denorm_cutoff[2];
+ } wasmsimd_rr2_p5;
+#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
+};
+
+
+// Sqrt (Square Root): used by VSQRT microkernels.
+
+union xnn_f16_sqrt_params {
+ char _; // Dummy member variable to comply with the C standard
+};
+
+union xnn_f32_sqrt_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ int32_t mask_table[14];
+ } avx;
+ struct {
+ XNN_ALIGN(32) float half[8];
+ int32_t mask_table[14];
+ } fma;
+ struct {
+ float half;
+ } avx512;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+
+// CHW: used by CONV/DWCONV microkernels in CHW layout with Min+Max parameters.
+
+union xnn_f16_chw_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ uint16_t min;
+ uint16_t max;
+ XNN_ALIGN(8) uint16_t mask_even[4]; // used by stride 2 kernels
+ XNN_ALIGN(8) uint16_t mask_odd[4]; // used by stride 2 kernels
+ XNN_ALIGN(8) uint16_t mask[4]; // used by stride 1 kernels
+ XNN_ALIGN(16) uint16_t maskx8[8]; // used by stride 1 x8 kernels
+ } neonfp16arith;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+};
+
+union xnn_f32_chw_params {
+ struct {
+ XNN_ALIGN(16) int32_t mask_even[4]; // used by stride 2 kernels
+ XNN_ALIGN(16) int32_t mask_odd[4]; // used by stride 2 kernels
+ XNN_ALIGN(16) int32_t mask[4]; // used by stride 1 kernels
+ float min;
+ float max;
+ } scalar;
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ float min;
+ float max;
+ XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
+ XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
+ XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float min[4];
+ XNN_ALIGN(16) float max[4];
+ XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
+ XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
+ XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
+ } sse;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+};
+
+
+// GAvgPool (Global Average Pool): used by GAVGPOOL microkernels in CHW layout with Scale+Min+Max parameters.
+
+union xnn_f16_gavgpool_params {
+ char _; // Dummy member variable to comply with the C standard
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ XNN_ALIGN(16) uint16_t mask[8];
+ uint16_t multiplier;
+ uint16_t output_min;
+ uint16_t output_max;
+ } neonfp16arith;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
+};
+
+union xnn_f32_gavgpool_params {
+ struct {
+ XNN_ALIGN(16) int32_t mask[4];
+ float multiplier;
+ float output_min;
+ float output_max;
+ } scalar;
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ struct {
+ XNN_ALIGN(16) float multiplier[4];
+ XNN_ALIGN(16) float output_min[4];
+ XNN_ALIGN(16) float output_max[4];
+ XNN_ALIGN(16) uint32_t mask[4];
+ } sse;
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ struct {
+ XNN_ALIGN(16) uint32_t mask[4];
+ float multiplier;
+ float output_min;
+ float output_max;
+ } neon;
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
+};
+
+// Forward declare for use in microkernel headers for JIT generator functions.
+struct xnn_code_buffer;
+
+// JIT GEMM: used by GEMM/IGEMM microkernel generators.
+
+struct jit_gemm_params {
+ struct {
+ float min;
+ float max;
+ } f32_minmax;
+};
diff --git a/src/xnnpack/pad.h b/src/xnnpack/pad.h
index 835c38cc4..c4ea02578 100644
--- a/src/xnnpack/pad.h
+++ b/src/xnnpack/pad.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 5216d2ca9..a596b9b24 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -14,2413 +14,9 @@
#include <xnnpack.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
-union xnn_f16_default_params {
- // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
- char _; // Dummy member variable to comply with the C standard
-};
-
-// scaleminmax is used for avgpool ukernels.
-union xnn_f16_scaleminmax_params {
- // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t scale;
- uint16_t min;
- uint16_t max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float min[8];
- XNN_ALIGN(32) float max[8];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f16_minmax_params {
- // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t min;
- uint16_t max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float min[8];
- XNN_ALIGN(32) float max[8];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_default_params {
- // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- int32_t mask_table[14];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_relu_params {
- // Empty; serves to differentiate pointer types for micro-kernels with different fused activations.
- char _; // Dummy member variable to comply with the C standard
-};
-
-union xnn_f32_minmax_params {
- struct {
- float min;
- float max;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float min[4];
- XNN_ALIGN(16) float max[4];
- } sse;
- struct {
- XNN_ALIGN(32) float min[8];
- XNN_ALIGN(32) float max[8];
- int32_t mask_table[14];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float min[2];
- XNN_ALIGN(8) float max[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f32_abs_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float nonsign_mask[4];
- } sse;
- struct {
- XNN_ALIGN(32) float nonsign_mask[8];
- int32_t mask_table[14];
- } avx;
- struct {
- uint32_t nonsign_mask;
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float nonsign_mask[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f32_neg_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float sign_mask[4];
- } sse;
- struct {
- XNN_ALIGN(32) float sign_mask[8];
- int32_t mask_table[14];
- } avx;
- struct {
- uint32_t sign_mask;
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float sign_mask[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f16_abs_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint16_t nonsign_mask[8];
- } sse;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f16_neg_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint16_t sign_mask[8];
- } sse;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f16_rnd_params {
- char _; // Dummy member variable to comply with the C standard
-};
-
-union xnn_f32_rnd_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float sign_mask[4];
- XNN_ALIGN(16) float one[4];
- } sse2;
- struct {
- int32_t mask_table[14];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f16_elu_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t prescale;
- uint16_t sat_cutoff;
- uint16_t magic_bias;
- uint16_t log2e;
- uint16_t minus_ln2;
- uint16_t c3;
- uint16_t c2;
- uint16_t minus_alpha;
- uint16_t beta;
- } neonfp16arith_rr1_p3;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float c1[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- } avx2_rr1_p3;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_elu_params {
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c3;
- float c2;
- float one;
- } scalar_rr2_lut16_p3;
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c6;
- float c5;
- float c4;
- float c3;
- float c2;
- float one;
- } scalar_rr2_p6;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c6;
- float c5;
- float c4;
- float c3;
- float c2;
- } neon_rr2_p6;
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c3;
- float c2;
- } neon_rr2_lut16_p3;
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2;
- float c6;
- float c5;
- float c4;
- float c3;
- float c2;
- } neonfma_rr1_p6;
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2;
- float c3;
- float c2;
- } neonfma_rr1_lut16_p3;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float prescale[4];
- XNN_ALIGN(16) float alpha[4];
- XNN_ALIGN(16) float beta[4];
- XNN_ALIGN(16) float sat_cutoff[4];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float log2e[4];
- XNN_ALIGN(16) uint32_t index_mask[4];
- XNN_ALIGN(16) float minus_ln2_hi[4];
- XNN_ALIGN(16) float minus_ln2_lo[4];
- XNN_ALIGN(16) float c3[4];
- XNN_ALIGN(16) float c2[4];
- XNN_ALIGN(16) float one[4];
- } sse2_rr2_lut16_p3;
- struct {
- XNN_ALIGN(16) float prescale[4];
- XNN_ALIGN(16) float alpha[4];
- XNN_ALIGN(16) float beta[4];
- XNN_ALIGN(16) float sat_cutoff[4];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float log2e[4];
- XNN_ALIGN(16) float minus_ln2_hi[4];
- XNN_ALIGN(16) float minus_ln2_lo[4];
- XNN_ALIGN(16) float c6[4];
- XNN_ALIGN(16) float c5[4];
- XNN_ALIGN(16) float c4[4];
- XNN_ALIGN(16) float c3[4];
- XNN_ALIGN(16) float c2[4];
- XNN_ALIGN(16) float one[4];
- } sse2_rr2_p6;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) uint32_t index_mask[8];
- XNN_ALIGN(32) float minus_ln2_hi[8];
- XNN_ALIGN(32) float minus_ln2_lo[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float one[8];
- int32_t mask_table[14];
- } avx_rr2_lut16_p3;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) uint32_t index_mask[8];
- XNN_ALIGN(32) float table[8];
- XNN_ALIGN(32) float minus_ln2_hi[8];
- XNN_ALIGN(32) float minus_ln2_lo[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float one[8];
- int32_t mask_table[14];
- } avx_rr2_lut4_p4;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2_hi[8];
- XNN_ALIGN(32) float minus_ln2_lo[8];
- XNN_ALIGN(32) float c6[8];
- XNN_ALIGN(32) float c5[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float one[8];
- int32_t mask_table[14];
- } avx_rr2_p6;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) uint32_t index_mask[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- int32_t mask_table[14];
- } avx2_rr1_lut16_p3;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) uint32_t table[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- int32_t mask_table[14];
- } avx2_rr1_lut8_p4;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float table[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- int32_t mask_table[14];
- } avx2_rr1_lut4_p4;
- struct {
- XNN_ALIGN(32) float prescale[8];
- XNN_ALIGN(32) float alpha[8];
- XNN_ALIGN(32) float beta[8];
- XNN_ALIGN(32) float sat_cutoff[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c6[8];
- XNN_ALIGN(32) float c5[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- int32_t mask_table[14];
- } avx2_rr1_p6;
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2;
- float c3;
- float c2;
- XNN_ALIGN(64) uint32_t table[16];
- } avx512_rr1_lut16_p3;
- struct {
- float prescale;
- float alpha;
- float beta;
- float sat_cutoff;
- float magic_bias;
- float log2e;
- float minus_ln2;
- float c6;
- float c5;
- float c4;
- float c3;
- float c2;
- } avx512_rr1_p6;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float prescale[2];
- XNN_ALIGN(8) float alpha[2];
- XNN_ALIGN(8) float beta[2];
- XNN_ALIGN(8) float sat_cutoff[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) float log2e[2];
- XNN_ALIGN(8) uint32_t index_mask[2];
- XNN_ALIGN(8) float minus_ln2_hi[2];
- XNN_ALIGN(8) float minus_ln2_lo[2];
- XNN_ALIGN(8) float c3[2];
- XNN_ALIGN(8) float c2[2];
- XNN_ALIGN(8) float one[2];
- } wasmsimd_rr2_lut16_p3;
- struct {
- XNN_ALIGN(8) float prescale[2];
- XNN_ALIGN(8) float alpha[2];
- XNN_ALIGN(8) float beta[2];
- XNN_ALIGN(8) float sat_cutoff[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) float log2e[2];
- XNN_ALIGN(8) float minus_ln2_hi[2];
- XNN_ALIGN(8) float minus_ln2_lo[2];
- XNN_ALIGN(8) float c6[2];
- XNN_ALIGN(8) float c5[2];
- XNN_ALIGN(8) float c4[2];
- XNN_ALIGN(8) float c3[2];
- XNN_ALIGN(8) float c2[2];
- XNN_ALIGN(8) float one[2];
- } wasmsimd_rr2_p6;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f16_expminus_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t magic_bias;
- uint16_t log2e;
- uint16_t minus_ln2_hi;
- uint16_t minus_ln2_lo;
- uint16_t c2;
- uint16_t c1;
- uint16_t denorm_cutoff;
- } neonfp16arith_rr2_p2;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float c1[8];
- XNN_ALIGN(32) float denorm_cutoff[8];
- } avx2_rr1_p2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_expminus_params {
- struct {
- float log2e;
- float magic_bias;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float denorm_cutoff;
- } scalar_rr2_p5;
- struct {
- float log2e;
- float magic_bias;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c2;
- float denorm_cutoff;
- } scalar_rr2_lut64_p2;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float log2e;
- float magic_bias;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float denorm_cutoff;
- } neon_rr2_p5;
- struct {
- float log2e;
- float magic_bias;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c2;
- float denorm_cutoff;
- } neon_rr2_lut64_p2;
- struct {
- float log2e;
- float magic_bias;
- float minus_ln2;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float denorm_cutoff;
- } neonfma_rr1_p5;
- struct {
- float log2e;
- float magic_bias;
- float minus_ln2;
- float c2;
- float denorm_cutoff;
- } neonfma_rr1_lut64_p2;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float log2e[4];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float minus_ln2_hi[4];
- XNN_ALIGN(16) float minus_ln2_lo[4];
- XNN_ALIGN(16) float c5[4];
- XNN_ALIGN(16) float c4[4];
- XNN_ALIGN(16) float c3[4];
- XNN_ALIGN(16) float c2[4];
- XNN_ALIGN(16) float c1[4];
- XNN_ALIGN(16) float denorm_cutoff[4];
- } sse2_rr2_p5;
- struct {
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c5[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float c1[8];
- XNN_ALIGN(32) float denorm_cutoff[8];
- int32_t mask_table[14];
- } avx2_rr1_p5;
- struct {
- float log2e;
- float minus_ln2;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float c0;
- } avx512_rr1_p5;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float log2e[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) float minus_ln2_hi[2];
- XNN_ALIGN(8) float minus_ln2_lo[2];
- XNN_ALIGN(8) float c5[2];
- XNN_ALIGN(8) float c4[2];
- XNN_ALIGN(8) float c3[2];
- XNN_ALIGN(8) float c2[2];
- XNN_ALIGN(8) float c1[2];
- XNN_ALIGN(8) float denorm_cutoff[2];
- } wasmsimd_rr2_p5;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f16_lrelu_params {
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t slope;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float slope[8];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_lrelu_params {
- struct {
- float slope;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float slope[4];
- } sse;
- struct {
- XNN_ALIGN(32) float slope[8];
- int32_t mask_table[14];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float slope[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f16_sigmoid_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t magic_bias;
- uint16_t minus_log2e;
- uint16_t ln2_hi;
- uint16_t ln2_lo;
- uint16_t c2;
- uint16_t c1;
- uint16_t denorm_cutoff;
- } neonfp16arith_rr2_p2;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float sign_mask[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float c1[8];
- XNN_ALIGN(32) float one[8];
- XNN_ALIGN(32) float denorm_cutoff[8];
- } avx2_rr1_p2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_sigmoid_params {
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2_hi;
- float ln2_lo;
- float c1;
- float one;
- float denorm_cutoff;
- } scalar_rr2_lut2048_p1;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2_hi;
- float ln2_lo;
- float c2;
- float one;
- float denorm_cutoff;
- } scalar_rr2_lut64_p2;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2_hi;
- float ln2_lo;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float one;
- float denorm_cutoff;
- } scalar_rr2_p5;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2_hi;
- float ln2_lo;
- float c1;
- float denorm_cutoff;
- } neon_rr2_lut2048_p1;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2_hi;
- float ln2_lo;
- float c2;
- float denorm_cutoff;
- } neon_rr2_lut64_p2;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2_hi;
- float ln2_lo;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float denorm_cutoff;
- } neon_rr2_p5;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2;
- float c1;
- float denorm_cutoff;
- } neonfma_rr1_lut2048_p1;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2;
- float c2;
- float denorm_cutoff;
- } neonfma_rr1_lut64_p2;
- struct {
- float magic_bias;
- float minus_log2e;
- float ln2;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float denorm_cutoff;
- } neonfma_rr1_p5;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float sign_mask[4];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float log2e[4];
- XNN_ALIGN(16) uint32_t index_mask[4];
- XNN_ALIGN(16) float minus_ln2_hi[4];
- XNN_ALIGN(16) float minus_ln2_lo[4];
- XNN_ALIGN(16) float c2[4];
- XNN_ALIGN(16) float one[4];
- XNN_ALIGN(16) float denorm_cutoff[4];
- } sse2_rr2_lut64_p2;
- struct {
- XNN_ALIGN(16) float sign_mask[4];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float log2e[4];
- XNN_ALIGN(16) float minus_ln2_hi[4];
- XNN_ALIGN(16) float minus_ln2_lo[4];
- XNN_ALIGN(16) float c5[4];
- XNN_ALIGN(16) float c4[4];
- XNN_ALIGN(16) float c3[4];
- XNN_ALIGN(16) float c2[4];
- XNN_ALIGN(16) float c1[4];
- XNN_ALIGN(16) float one[4];
- XNN_ALIGN(16) float denorm_cutoff[4];
- } sse2_rr2_p5;
- struct {
- XNN_ALIGN(32) float sign_mask[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2_hi[8];
- XNN_ALIGN(32) float minus_ln2_lo[8];
- XNN_ALIGN(32) float c5[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float c1[8];
- XNN_ALIGN(32) float one[8];
- XNN_ALIGN(32) float two[8];
- XNN_ALIGN(32) float denorm_cutoff[8];
- int32_t mask_table[14];
- } avx_rr2_p5;
- struct {
- XNN_ALIGN(32) float sign_mask[8];
- XNN_ALIGN(32) float magic_bias[8];
- XNN_ALIGN(32) float log2e[8];
- XNN_ALIGN(32) float minus_ln2[8];
- XNN_ALIGN(32) float c5[8];
- XNN_ALIGN(32) float c4[8];
- XNN_ALIGN(32) float c3[8];
- XNN_ALIGN(32) float c2[8];
- XNN_ALIGN(32) float c1[8];
- XNN_ALIGN(32) float one[8];
- XNN_ALIGN(32) float denorm_cutoff[8];
- int32_t mask_table[14];
- } avx2_rr1_p5;
- struct {
- uint32_t sign_mask;
- float magic_bias;
- float log2e;
- float minus_ln2;
- float c3;
- float c2;
- float one;
- XNN_ALIGN(64) float table[16];
- } avx512_rr1_lut16_p3;
- struct {
- uint32_t sign_mask;
- float magic_bias;
- float log2e;
- float minus_ln2_hi;
- float minus_ln2_lo;
- float c2;
- float c1;
- float one;
- XNN_ALIGN(64) float table_lo[16];
- XNN_ALIGN(64) float table_hi[16];
- } avx512_rr2_lut32_p2;
- struct {
- uint32_t sign_mask;
- float log2e;
- float minus_ln2;
- float c5;
- float c4;
- float c3;
- float c2;
- float c1;
- float one;
- } avx512_rr1_p5;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) float minus_log2e[2];
- XNN_ALIGN(8) uint32_t index_mask[2];
- XNN_ALIGN(8) float ln2_hi[2];
- XNN_ALIGN(8) float ln2_lo[2];
- XNN_ALIGN(8) float c2[2];
- XNN_ALIGN(8) float one[2];
- XNN_ALIGN(8) float denorm_cutoff[2];
- } wasmsimd_rr2_lut64_p2;
- struct {
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) float minus_log2e[2];
- XNN_ALIGN(8) float ln2_hi[2];
- XNN_ALIGN(8) float ln2_lo[2];
- XNN_ALIGN(8) float c5[2];
- XNN_ALIGN(8) float c4[2];
- XNN_ALIGN(8) float c3[2];
- XNN_ALIGN(8) float c2[2];
- XNN_ALIGN(8) float c1[2];
- XNN_ALIGN(8) float one[2];
- XNN_ALIGN(8) float denorm_cutoff[2];
- } wasmsimd_rr2_p5;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f16_sqrt_params {
- char _; // Dummy member variable to comply with the C standard
-};
-
-union xnn_f32_sqrt_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- int32_t mask_table[14];
- } avx;
- struct {
- XNN_ALIGN(32) float half[8];
- int32_t mask_table[14];
- } fma;
- struct {
- float half;
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_chw_params {
- struct {
- XNN_ALIGN(16) int32_t mask_even[4]; // used by stride 2 kernels
- XNN_ALIGN(16) int32_t mask_odd[4]; // used by stride 2 kernels
- XNN_ALIGN(16) int32_t mask[4]; // used by stride 1 kernels
- float min;
- float max;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float min;
- float max;
- XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
- XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
- XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float min[4];
- XNN_ALIGN(16) float max[4];
- XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
- XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
- XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels
- } sse;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f16_chw_params {
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t min;
- uint16_t max;
- XNN_ALIGN(8) uint16_t mask_even[4]; // used by stride 2 kernels
- XNN_ALIGN(8) uint16_t mask_odd[4]; // used by stride 2 kernels
- XNN_ALIGN(8) uint16_t mask[4]; // used by stride 1 kernels
- XNN_ALIGN(16) uint16_t maskx8[8]; // used by stride 1 x8 kernels
- } neonfp16arith;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-};
-
-union xnn_s8_minmax_params {
- struct {
- int32_t min;
- int32_t max;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint8_t bias[16];
- XNN_ALIGN(16) uint8_t min_with_bias[16];
- XNN_ALIGN(16) uint8_t max_with_bias[16];
- } sse2;
- struct {
- XNN_ALIGN(16) int8_t min[16];
- XNN_ALIGN(16) int8_t max[16];
- } sse4;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int8_t min;
- int8_t max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int8_t min[8];
- XNN_ALIGN(8) int8_t max[8];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_u8_minmax_params {
- struct {
- uint32_t min;
- uint32_t max;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint8_t min[16];
- XNN_ALIGN(16) uint8_t max[16];
- } sse2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint8_t min;
- uint8_t max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) uint8_t min[8];
- XNN_ALIGN(8) uint8_t max[8];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f32_scaleminmax_params {
- struct {
- float scale;
- float min;
- float max;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float min[4];
- XNN_ALIGN(16) float max[4];
- } sse;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_gavgpool_params {
- struct {
- XNN_ALIGN(16) int32_t mask[4];
- float multiplier;
- float output_min;
- float output_max;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float multiplier[4];
- XNN_ALIGN(16) float output_min[4];
- XNN_ALIGN(16) float output_max[4];
- XNN_ALIGN(16) uint32_t mask[4];
- } sse;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- XNN_ALIGN(16) uint32_t mask[4];
- float multiplier;
- float output_min;
- float output_max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
-};
-
-union xnn_f16_gavgpool_params {
- // Empty; serves to differentiate pointer types for micro-kernels without fused activation.
- char _; // Dummy member variable to comply with the C standard
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- XNN_ALIGN(16) uint16_t mask[8];
- uint16_t multiplier;
- uint16_t output_min;
- uint16_t output_max;
- } neonfp16arith;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
-};
-
-union xnn_f16_hswish_params {
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint16_t sixth;
- uint16_t three;
- uint16_t six;
- uint16_t pad; // pad to 8 bytes for neonfp16arith assembly.
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(32) float sixth[8];
- XNN_ALIGN(32) float three[8];
- XNN_ALIGN(16) uint16_t six[8];
- } avx;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_f32_hswish_params {
- struct {
- float sixth;
- float three;
- float six;
- } scalar;
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float sixth[4];
- XNN_ALIGN(16) float half[4];
- XNN_ALIGN(16) float one[4];
- } sse;
- struct {
- XNN_ALIGN(32) float sixth[8];
- XNN_ALIGN(32) float half[8];
- XNN_ALIGN(32) float one[8];
- int32_t mask_table[14];
- } avx;
- struct {
- float sixth;
- float half;
- float one;
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float sixth[2];
- XNN_ALIGN(8) float three[2];
- XNN_ALIGN(8) float six[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_conv_minmax_params {
- struct {
- int32_t kernel_zero_point;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } fp32_scalar_fmagic;
- struct {
- int32_t kernel_zero_point;
- float scale;
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } fp32_scalar_imagic;
- struct {
- int32_t kernel_zero_point;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } fp32_scalar_lrintf;
-#if XNN_ARCH_ARM
- struct {
- float scale;
- float magic_bias;
- uint32_t minus_kernel_zero_point;
- int32_t magic_bias_less_zero_point;
- uint32_t output_min;
- uint32_t output_max;
- } fp32_armv6simd;
-#endif // XNN_ARCH_ARM
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint8_t kernel_zero_point[4];
- float scale;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } fp32_neon;
- struct {
- uint8_t kernel_zero_point[4];
- float scale;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } fp32_neonv8;
- struct {
- uint8_t kernel_zero_point[4];
- int32_t right_pre_shift;
- int32_t multiplier;
- int32_t right_post_shift;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } rndnu_neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int16_t kernel_zero_point[8];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- } fp32_sse2;
- struct {
- XNN_ALIGN(32) int16_t kernel_zero_point[16];
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(32) uint8_t output_min[32];
- } fp32_avx2;
- struct {
- XNN_ALIGN(64) int16_t kernel_zero_point[32];
- XNN_ALIGN(64) float scale[16];
- XNN_ALIGN(64) float output_max_less_zero_point[16];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(64) uint8_t output_min[64];
- } fp32_avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t kernel_zero_point[4];
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) int8_t output_max[8];
- } fp32_wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_minmax_params {
- struct {
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } scalar_imagic;
- struct {
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } scalar_fmagic;
- struct {
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } scalar_lrintf;
-#if XNN_ARCH_ARM
- struct {
- float magic_bias;
- int32_t magic_bias_less_zero_point;
- uint32_t output_min;
- uint32_t output_max;
- } armv6simd;
-#endif // XNN_ARCH_ARM
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } neon;
- struct {
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } neonv8;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int16_t output_min[8];
- } sse2;
- struct {
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- } sse4;
- struct {
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(32) int8_t output_min[32];
- } avx2;
- struct {
- XNN_ALIGN(64) float output_max_less_zero_point[16];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(64) int8_t output_min[64];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) int8_t output_max[8];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_conv_minmax_params {
- struct {
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } fp32_scalar_fmagic;
- struct {
- float scale;
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } fp32_scalar_imagic;
- struct {
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } fp32_scalar_lrintf;
-#if XNN_ARCH_ARM
- struct {
- float scale;
- float magic_bias;
- int32_t magic_bias_less_zero_point;
- uint32_t output_min;
- uint32_t output_max;
- } fp32_armv6simd;
-#endif // XNN_ARCH_ARM
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float scale;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } fp32_neon;
- struct {
- float scale;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } fp32_neonv8;
- struct {
- int32_t right_pre_shift;
- int32_t multiplier;
- int32_t right_post_shift;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } rndnu_neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int16_t output_min[8];
- } fp32_sse2;
- struct {
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- } fp32_sse4;
- struct {
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(32) int8_t output_min[32];
- } fp32_avx2;
- struct {
- XNN_ALIGN(64) float scale[16];
- XNN_ALIGN(64) float output_max_less_zero_point[16];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(64) int8_t output_min[64];
- } fp32_avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) int8_t output_max[8];
- } fp32_wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_addsub_minmax_params {
- struct {
- int32_t bias;
- int32_t a_multiplier;
- int32_t b_multiplier;
- int32_t rounding;
- uint32_t shift;
- int32_t output_min_less_zero_point;
- int32_t output_max_less_zero_point;
- int32_t output_zero_point;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint8_t a_zero_point;
- uint8_t b_zero_point;
- int16_t output_zero_point;
- int32_t a_multiplier;
- int32_t b_multiplier;
- int32_t right_shift;
- uint8_t output_min;
- uint8_t output_max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int32_t bias[4];
- XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
- XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
- XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
- XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
- uint32_t shift;
- uint32_t b_multiplier;
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- XNN_ALIGN(16) uint8_t output_max[16];
- } sse2;
- struct {
- XNN_ALIGN(16) int32_t bias[4];
- XNN_ALIGN(16) int32_t a_multiplier[4];
- XNN_ALIGN(16) int32_t b_multiplier[4];
- XNN_ALIGN(16) uint64_t shift[2];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- XNN_ALIGN(16) uint8_t output_max[16];
- } sse4;
- struct {
- XNN_ALIGN(32) int32_t bias[8];
- XNN_ALIGN(32) int32_t a_multiplier[8];
- XNN_ALIGN(32) int32_t b_multiplier[8];
- XNN_ALIGN(32) uint64_t shift[4];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(16) uint8_t output_min[16];
- XNN_ALIGN(16) uint8_t output_max[16];
- } avx2;
- struct {
- XNN_ALIGN(64) int32_t bias[16];
- XNN_ALIGN(64) int32_t a_multiplier[16];
- XNN_ALIGN(64) int32_t b_multiplier[16];
- XNN_ALIGN(64) uint64_t shift[8];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(32) uint8_t output_min[32];
- XNN_ALIGN(32) uint8_t output_max[32];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int32_t bias[2];
- XNN_ALIGN(8) int32_t a_multiplier[2];
- XNN_ALIGN(8) int32_t b_multiplier[2];
- uint32_t shift;
- XNN_ALIGN(8) int16_t output_zero_point[4];
- XNN_ALIGN(8) uint8_t output_min[8];
- XNN_ALIGN(8) uint8_t output_max[8];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_addsub_minmax_params {
- struct {
- int32_t bias;
- int32_t a_multiplier;
- int32_t b_multiplier;
- uint32_t shift;
- int32_t output_min_less_zero_point;
- int32_t output_max_less_zero_point;
- int32_t output_zero_point;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int8_t a_zero_point;
- int8_t b_zero_point;
- int16_t output_zero_point;
- int32_t a_multiplier;
- int32_t b_multiplier;
- int32_t right_shift;
- int8_t output_min;
- int8_t output_max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int32_t bias[4];
- XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
- XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
- XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
- XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
- uint32_t shift;
- uint32_t b_multiplier;
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int16_t output_min[8];
- XNN_ALIGN(16) int16_t output_max[8];
- } sse2;
- struct {
- XNN_ALIGN(16) int32_t bias[4];
- XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
- XNN_ALIGN(16) uint16_t a_multiplier_hi[8];
- XNN_ALIGN(16) uint16_t b_multiplier_lo[8];
- XNN_ALIGN(16) uint16_t b_multiplier_hi[8];
- uint32_t shift;
- uint32_t b_multiplier;
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- XNN_ALIGN(16) int8_t output_max[16];
- } sse4_mul16;
- struct {
- XNN_ALIGN(16) int32_t bias[4];
- XNN_ALIGN(16) int32_t a_multiplier[4];
- XNN_ALIGN(16) int32_t b_multiplier[4];
- XNN_ALIGN(16) uint64_t shift[2];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- XNN_ALIGN(16) int8_t output_max[16];
- } sse4_mul32;
- struct {
- XNN_ALIGN(32) int32_t bias[8];
- XNN_ALIGN(32) int32_t a_multiplier[8];
- XNN_ALIGN(32) int32_t b_multiplier[8];
- XNN_ALIGN(32) uint64_t shift[4];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(16) int8_t output_min[16];
- XNN_ALIGN(16) int8_t output_max[16];
- } avx2;
- struct {
- XNN_ALIGN(64) int32_t bias[16];
- XNN_ALIGN(64) int32_t a_multiplier[16];
- XNN_ALIGN(64) int32_t b_multiplier[16];
- XNN_ALIGN(64) uint64_t shift[8];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(32) int8_t output_min[32];
- XNN_ALIGN(32) int8_t output_max[32];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int32_t bias[2];
- XNN_ALIGN(8) int32_t a_multiplier[2];
- XNN_ALIGN(8) int32_t b_multiplier[2];
- uint32_t shift;
- XNN_ALIGN(8) int16_t output_zero_point[4];
- XNN_ALIGN(8) int8_t output_min[8];
- XNN_ALIGN(8) int8_t output_max[8];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_mul_minmax_params {
- struct {
- int32_t a_zero_point;
- int32_t b_zero_point;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } fp32_scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint8_t a_zero_point[2];
- uint8_t b_zero_point[2];
- float scale;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } fp32_neon;
- struct {
- uint8_t a_zero_point[2];
- uint8_t b_zero_point[2];
- float scale;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } fp32_neonv8;
- struct {
- uint8_t a_zero_point[2];
- uint8_t b_zero_point[2];
- int32_t left_pre_shift;
- int32_t multiplier;
- int32_t left_post_shift;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } rndnu_neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int16_t a_zero_point[8];
- XNN_ALIGN(16) int16_t b_zero_point[8];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- XNN_ALIGN(16) uint8_t output_max[16];
- } fp32_sse2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t a_zero_point[4];
- XNN_ALIGN(8) int16_t b_zero_point[4];
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) uint8_t output_max[8];
- } fp32_wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_mul_minmax_params {
- struct {
- int32_t a_zero_point;
- int32_t b_zero_point;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } fp32_scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int8_t a_zero_point[2];
- int8_t b_zero_point[2];
- float scale;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } fp32_neon;
- struct {
- int8_t a_zero_point[2];
- int8_t b_zero_point[2];
- float scale;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } fp32_neonv8;
- struct {
- int8_t a_zero_point[2];
- int8_t b_zero_point[2];
- int32_t left_pre_shift;
- int32_t multiplier;
- int32_t left_post_shift;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } rndnu_neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int16_t a_zero_point[8];
- XNN_ALIGN(16) int16_t b_zero_point[8];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int16_t output_min[8];
- XNN_ALIGN(16) int16_t output_max[8];
- } fp32_sse2;
- struct {
- XNN_ALIGN(16) int16_t a_zero_point[8];
- XNN_ALIGN(16) int16_t b_zero_point[8];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- XNN_ALIGN(16) int8_t output_max[16];
- } fp32_sse4;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t a_zero_point[4];
- XNN_ALIGN(8) int16_t b_zero_point[4];
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) int8_t output_max[8];
- } fp32_wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_lrelu_params {
- struct {
- int32_t input_zero_point;
- int32_t positive_multiplier;
- int32_t negative_multiplier;
- int32_t bias;
- } scalar_select;
- struct {
- int32_t input_zero_point;
- int32_t multiplier_diff;
- int32_t multiplier_base;
- int32_t bias;
- } scalar_andxor;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint32_t input_zero_point;
- uint32_t positive_multiplier;
- uint32_t negative_multiplier;
- int32_t bias;
- } armv6simd;
- struct {
- int16_t input_zero_point;
- int16_t positive_multiplier;
- int16_t negative_multiplier;
- int16_t output_zero_point;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int16_t input_zero_point[8];
- XNN_ALIGN(16) int16_t multiplier_diff[8];
- XNN_ALIGN(16) int16_t multiplier_base[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- } sse2;
- struct {
- XNN_ALIGN(16) int16_t input_zero_point[8];
- XNN_ALIGN(16) int16_t positive_multiplier[8];
- XNN_ALIGN(16) int16_t negative_multiplier[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- } avx;
- struct {
- XNN_ALIGN(32) int16_t input_zero_point[16];
- XNN_ALIGN(32) int16_t positive_multiplier[16];
- XNN_ALIGN(32) int16_t negative_multiplier[16];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- } avx2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t input_zero_point[4];
- XNN_ALIGN(8) int16_t positive_multiplier[4];
- XNN_ALIGN(8) int16_t negative_multiplier[4];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- } wasmsimd_arm;
- struct {
- XNN_ALIGN(8) int16_t input_zero_point[4];
- XNN_ALIGN(8) int16_t multiplier_diff[4];
- XNN_ALIGN(8) int16_t multiplier_base[4];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- } wasmsimd_x86;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_lrelu_params {
- struct {
- int32_t input_zero_point;
- int32_t positive_multiplier;
- int32_t negative_multiplier;
- int32_t bias;
- } scalar_select;
- struct {
- int32_t input_zero_point;
- int32_t multiplier_base;
- int32_t multiplier_diff;
- int32_t bias;
- } scalar_andxor;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint32_t input_zero_point;
- uint32_t positive_multiplier;
- uint32_t negative_multiplier;
- int32_t bias;
- } armv6simd;
- struct {
- uint16_t input_zero_point;
- int16_t positive_multiplier;
- int16_t negative_multiplier;
- int16_t output_zero_point;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int16_t input_zero_point[8];
- XNN_ALIGN(16) int16_t multiplier_diff[8];
- XNN_ALIGN(16) int16_t multiplier_base[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- } sse2;
- struct {
- XNN_ALIGN(16) int16_t input_zero_point[8];
- XNN_ALIGN(16) int16_t positive_multiplier[8];
- XNN_ALIGN(16) int16_t negative_multiplier[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- } avx;
- struct {
- XNN_ALIGN(32) int16_t input_zero_point[16];
- XNN_ALIGN(32) int16_t positive_multiplier[16];
- XNN_ALIGN(32) int16_t negative_multiplier[16];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- } avx2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t input_zero_point[4];
- XNN_ALIGN(8) int16_t positive_multiplier[4];
- XNN_ALIGN(8) int16_t negative_multiplier[4];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- } wasmsimd_arm;
- struct {
- XNN_ALIGN(8) int16_t input_zero_point[4];
- XNN_ALIGN(8) int16_t multiplier_diff[4];
- XNN_ALIGN(8) int16_t multiplier_base[4];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- } wasmsimd_x86;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_avgpool_minmax_params {
- struct {
- int32_t init_bias;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } fp32_scalar_fmagic;
- struct {
- int32_t init_bias;
- float scale;
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } fp32_scalar_imagic;
- struct {
- int32_t init_bias;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } fp32_scalar_lrintf;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int32_t init_bias;
- float scale;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } fp32_neon;
- struct {
- int32_t init_bias;
- float scale;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } fp32_neonv8;
- struct {
- int32_t init_bias;
- int32_t left_pre_shift;
- int32_t multiplier;
- int32_t left_post_shift;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } rndnu_neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int32_t init_bias[4];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- } fp32_sse2;
- struct {
- XNN_ALIGN(16) int32_t init_bias[4];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- } fp32_sse4;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int32_t init_bias[2];
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) uint8_t output_max[8];
- } fp32_wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-
- // Legacy parameters used by QU8 AVGPOOL microkernels
- struct {
- int32_t bias;
- int32_t multiplier;
- int64_t rounding;
- uint32_t right_shift;
- int32_t output_min_less_zero_point;
- int32_t output_max_less_zero_point;
- int32_t output_zero_point;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int32_t bias;
- int32_t multiplier;
- int64_t left_shift;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int32_t bias[4];
- XNN_ALIGN(16) uint32_t multiplier[4];
- XNN_ALIGN(16) uint64_t rounding[2];
- XNN_ALIGN(16) uint64_t right_shift[2];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- XNN_ALIGN(16) uint8_t output_max[16];
- } sse2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-};
-
-union xnn_qs8_avgpool_minmax_params {
- struct {
- int32_t init_bias;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- } fp32_scalar_fmagic;
- struct {
- int32_t init_bias;
- float scale;
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } fp32_scalar_imagic;
- struct {
- int32_t init_bias;
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } fp32_scalar_lrintf;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int32_t init_bias;
- float scale;
- float magic_bias;
- int32_t magic_bias_less_output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } fp32_neon;
- struct {
- int32_t init_bias;
- float scale;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } fp32_neonv8;
- struct {
- int32_t init_bias;
- int32_t left_pre_shift;
- int32_t multiplier;
- int32_t left_post_shift;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } rndnu_neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int32_t init_bias[4];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int16_t output_min[8];
- } fp32_sse2;
- struct {
- XNN_ALIGN(16) int32_t init_bias[4];
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- } fp32_sse4;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int32_t init_bias[2];
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2];
- XNN_ALIGN(8) int8_t output_max[8];
- } fp32_wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f16_f32_cvt_params {
- struct {
- uint32_t sign_mask;
- uint32_t exp_offset;
- float exp_scale;
- uint32_t magic_mask;
- float magic_bias;
- uint32_t denorm_cutoff;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float exp_scale;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint16_t sign_mask[8];
- XNN_ALIGN(16) uint16_t exp_offset[8];
- XNN_ALIGN(16) float exp_scale[4];
- XNN_ALIGN(16) uint16_t magic_mask[8];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) int16_t denorm_cutoff[8];
- } sse_int16;
- struct {
- XNN_ALIGN(16) uint32_t sign_mask[4];
- XNN_ALIGN(16) uint32_t exp_offset[4];
- XNN_ALIGN(16) float exp_scale[4];
- XNN_ALIGN(16) uint32_t magic_bias[4];
- XNN_ALIGN(16) int32_t denorm_cutoff[4];
- } sse_int32;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) uint16_t sign_mask[4];
- XNN_ALIGN(8) uint16_t exp_offset[4];
- XNN_ALIGN(8) float exp_scale[2];
- XNN_ALIGN(8) uint16_t magic_mask[4];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int16_t denorm_cutoff[4];
- } wasmsimd_int16;
- struct {
- XNN_ALIGN(8) uint32_t sign_mask[2];
- XNN_ALIGN(8) uint32_t exp_offset[2];
- XNN_ALIGN(8) float exp_scale[2];
- XNN_ALIGN(8) uint32_t magic_bias[2];
- XNN_ALIGN(8) int32_t denorm_cutoff[2];
- } wasmsimd_int32;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f32_f16_cvt_params {
- struct {
- uint32_t nonsign_mask;
- uint32_t exp_bias;
- float scale_to_inf;
- uint32_t expw_max;
- float scale_to_zero;
- uint32_t bias_min;
- uint16_t exph_mask;
- uint16_t manth_mask;
- uint16_t nanh;
- } scalar_bitcast;
- struct {
- float scale_to_inf;
- uint32_t exp_bias;
- float scale_to_zero;
- uint32_t expw_max;
- uint32_t bias_min;
- uint16_t exph_mask;
- uint16_t manth_mask;
- uint16_t nanh;
- } scalar_fabsf;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint32_t exp_bias;
- float scale_to_inf;
- uint32_t expw_max;
- float scale_to_zero;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint32_t nonsign_mask[4];
- XNN_ALIGN(16) uint32_t exp_bias[4];
- XNN_ALIGN(16) float scale_to_inf[4];
- XNN_ALIGN(16) uint32_t expw_max[4];
- XNN_ALIGN(16) float scale_to_zero[4];
- XNN_ALIGN(16) int16_t bias_min[8];
- XNN_ALIGN(16) uint32_t manth_mask[4];
- XNN_ALIGN(16) uint32_t exph_mask[4];
- XNN_ALIGN(16) uint16_t nanh[8];
- } sse2;
- struct {
- int32_t mask_table[14];
- } f16c;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) uint32_t exp_bias[2];
- XNN_ALIGN(8) float scale_to_inf[2];
- XNN_ALIGN(8) uint32_t expw_max[2];
- XNN_ALIGN(8) float scale_to_zero[2];
- XNN_ALIGN(8) int16_t bias_min[4];
- XNN_ALIGN(8) uint32_t manth_mask[2];
- XNN_ALIGN(8) uint32_t exph_mask[2];
- XNN_ALIGN(8) uint16_t nanh[4];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f32_qs8_cvt_params {
- struct {
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_zero_point;
- } scalar_fmagic;
- struct {
- float scale;
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } scalar_imagic;
- struct {
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } scalar_lrintf;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float scale;
- float magic_bias;
- int32_t magic_bias_less_zero_point;
- int8_t output_min;
- int8_t output_max;
- } neon;
- struct {
- float scale;
- int16_t output_zero_point;
- int8_t output_min;
- int8_t output_max;
- } neonv8;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int16_t output_min[8];
- } sse2;
- struct {
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- } sse4;
- struct {
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) int8_t output_min[16];
- int32_t mask_table[14];
- } avx;
- struct {
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(32) uint32_t shuffle_mask[8];
- XNN_ALIGN(32) int8_t output_min[32];
- int32_t mask_table[14];
- } avx2;
- struct {
- XNN_ALIGN(64) float scale[16];
- XNN_ALIGN(64) float output_max_less_zero_point[16];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(64) int8_t output_min[64];
- XNN_ALIGN(64) uint32_t shuffle512_mask[16];
- XNN_ALIGN(32) uint32_t shuffle256_mask[8];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- XNN_ALIGN(8) int8_t output_min[8];
- XNN_ALIGN(8) int8_t output_max[8];
- } wasmsimd_cvt;
- struct {
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2];
- XNN_ALIGN(8) int8_t output_max[8];
- } wasmsimd_magic;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_f32_qu8_cvt_params {
- struct {
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- float magic_bias;
- int32_t magic_bias_less_zero_point;
- } scalar_fmagic;
- struct {
- float scale;
- float magic_bias;
- int32_t magic_min;
- int32_t magic_max;
- int32_t magic_bias_less_zero_point;
- } scalar_imagic;
- struct {
- float scale;
- float output_min_less_zero_point;
- float output_max_less_zero_point;
- int32_t output_zero_point;
- } scalar_lrintf;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- float scale;
- float magic_bias;
- int32_t magic_bias_less_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } neon;
- struct {
- float scale;
- int16_t output_zero_point;
- uint8_t output_min;
- uint8_t output_max;
- } neonv8;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) float scale[4];
- XNN_ALIGN(16) float output_max_less_zero_point[4];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- } sse2;
- struct {
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- XNN_ALIGN(16) uint8_t output_min[16];
- int32_t mask_table[14];
- } avx;
- struct {
- XNN_ALIGN(32) float scale[8];
- XNN_ALIGN(32) float output_max_less_zero_point[8];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- XNN_ALIGN(32) uint32_t shuffle_mask[8];
- XNN_ALIGN(32) uint8_t output_min[32];
- int32_t mask_table[14];
- } avx2;
- struct {
- XNN_ALIGN(64) float scale[16];
- XNN_ALIGN(64) float output_max_less_zero_point[16];
- XNN_ALIGN(64) int16_t output_zero_point[32];
- XNN_ALIGN(64) uint8_t output_min[64];
- XNN_ALIGN(64) uint32_t shuffle512_mask[16];
- XNN_ALIGN(32) uint32_t shuffle256_mask[8];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- XNN_ALIGN(8) uint8_t output_min[8];
- XNN_ALIGN(8) uint8_t output_max[8];
- } wasmsimd_cvt;
- struct {
- XNN_ALIGN(8) float scale[2];
- XNN_ALIGN(8) float magic_bias[2];
- XNN_ALIGN(8) int32_t magic_min[2];
- XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2];
- XNN_ALIGN(8) uint8_t output_max[8];
- } wasmsimd_magic;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_cvt_params {
- struct {
- int32_t bias;
- int32_t multiplier;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint32_t minus_input_zero_point;
- int32_t multiplier;
- int32_t bias;
- } armv6simd;
- struct {
- int16_t input_zero_point;
- int16_t multiplier;
- int16_t output_zero_point;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) int16_t multiplier[8];
- XNN_ALIGN(16) int32_t bias[4];
- } sse2;
- struct {
- XNN_ALIGN(16) int16_t input_zero_point[8];
- XNN_ALIGN(16) int16_t multiplier[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- } ssse3;
- struct {
- XNN_ALIGN(32) int16_t input_zero_point[16];
- XNN_ALIGN(32) int16_t multiplier[16];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- } avx2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t input_zero_point[4];
- XNN_ALIGN(8) int16_t multiplier[4];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qs8_f32_cvt_params {
- struct {
- int32_t zero_point;
- float scale;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int16_t minus_zero_point[2];
- float scale;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint8_t sign_mask[16];
- XNN_ALIGN(16) uint16_t magic_exp[8];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float scale[4];
- } sse2;
- struct {
- XNN_ALIGN(16) int32_t minus_zero_point[4];
- XNN_ALIGN(16) float scale[4];
- } sse4;
- struct {
- XNN_ALIGN(32) int32_t minus_zero_point[8];
- XNN_ALIGN(32) float scale[8];
- } avx;
- struct {
- XNN_ALIGN(64) int32_t minus_zero_point[16];
- XNN_ALIGN(64) float scale[16];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t minus_zero_point[4];
- XNN_ALIGN(8) float scale[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_cvt_params {
- struct {
- int32_t bias;
- int32_t multiplier;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- uint32_t minus_input_zero_point;
- int32_t multiplier;
- int32_t bias;
- } armv6simd;
- struct {
- uint16_t input_zero_point;
- int16_t multiplier;
- int16_t output_zero_point;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint16_t multiplier[8];
- XNN_ALIGN(16) int32_t bias[4];
- } sse2;
- struct {
- XNN_ALIGN(16) uint16_t input_zero_point[8];
- XNN_ALIGN(16) int16_t multiplier[8];
- XNN_ALIGN(16) int16_t output_zero_point[8];
- } ssse3;
- struct {
- XNN_ALIGN(32) uint16_t input_zero_point[16];
- XNN_ALIGN(32) int16_t multiplier[16];
- XNN_ALIGN(32) int16_t output_zero_point[16];
- } avx2;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) uint16_t input_zero_point[4];
- XNN_ALIGN(8) int16_t multiplier[4];
- XNN_ALIGN(8) int16_t output_zero_point[4];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
-union xnn_qu8_f32_cvt_params {
- struct {
- int32_t zero_point;
- float scale;
- } scalar;
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- struct {
- int16_t minus_zero_point[2];
- float scale;
- } neon;
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- struct {
- XNN_ALIGN(16) uint16_t magic_exp[8];
- XNN_ALIGN(16) float magic_bias[4];
- XNN_ALIGN(16) float scale[4];
- } sse2;
- struct {
- XNN_ALIGN(16) int32_t minus_zero_point[4];
- XNN_ALIGN(16) float scale[4];
- } sse4;
- struct {
- XNN_ALIGN(32) int32_t minus_zero_point[8];
- XNN_ALIGN(32) float scale[8];
- } avx;
- struct {
- XNN_ALIGN(64) int32_t minus_zero_point[16];
- XNN_ALIGN(64) float scale[16];
- } avx512;
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
- struct {
- XNN_ALIGN(8) int16_t minus_zero_point[4];
- XNN_ALIGN(8) float scale[2];
- } wasmsimd;
-#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
-};
-
typedef void (*xnn_ppmm_ukernel_function)(
size_t mr,
size_t nc,
@@ -4284,16 +1880,6 @@ typedef void (*xnn_init_qc8_scale_params_fn)(
const float scale[XNN_MIN_ELEMENTS(1)],
void* packed_w);
-// Forward declare to avoid circular includes between this and allocator.h.
-struct xnn_code_buffer;
-
-struct jit_gemm_params {
- struct {
- float min;
- float max;
- } f32_minmax;
-};
-
typedef enum xnn_status (*xnn_jit_gemm_code_generator_function)(
struct xnn_code_buffer *code, size_t max_mr, size_t nc, size_t kc, const void *params);
typedef enum xnn_status (*xnn_jit_igemm_code_generator_function)(
diff --git a/src/xnnpack/pavgpool.h b/src/xnnpack/pavgpool.h
index 828cf35bd..6c4d6ab75 100644
--- a/src/xnnpack/pavgpool.h
+++ b/src/xnnpack/pavgpool.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/ppmm.h b/src/xnnpack/ppmm.h
index cec14872e..8379edfb9 100644
--- a/src/xnnpack/ppmm.h
+++ b/src/xnnpack/ppmm.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/prelu.h b/src/xnnpack/prelu.h
index f3d999793..bb0d63706 100644
--- a/src/xnnpack/prelu.h
+++ b/src/xnnpack/prelu.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/raddexpminusmax.h b/src/xnnpack/raddexpminusmax.h
index 8f73d1939..ffc6407ed 100644
--- a/src/xnnpack/raddexpminusmax.h
+++ b/src/xnnpack/raddexpminusmax.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/raddextexp.h b/src/xnnpack/raddextexp.h
index 14381d1ab..f16e90be8 100644
--- a/src/xnnpack/raddextexp.h
+++ b/src/xnnpack/raddextexp.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/raddstoreexpminusmax.h b/src/xnnpack/raddstoreexpminusmax.h
index 62aecbad0..d1c487cb5 100644
--- a/src/xnnpack/raddstoreexpminusmax.h
+++ b/src/xnnpack/raddstoreexpminusmax.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/requantization-stubs.h b/src/xnnpack/requantization-stubs.h
index 5da9597f7..39c3597c3 100644
--- a/src/xnnpack/requantization-stubs.h
+++ b/src/xnnpack/requantization-stubs.h
@@ -11,8 +11,6 @@
#include <stdint.h>
#include <stddef.h>
-#include <xnnpack/params.h>
-
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/xnnpack/requantization.h b/src/xnnpack/requantization.h
index a17a1b1b8..4fe12aedd 100644
--- a/src/xnnpack/requantization.h
+++ b/src/xnnpack/requantization.h
@@ -13,11 +13,9 @@
#include <assert.h>
#include <math.h>
-#include <fp16.h>
-
#include <xnnpack/common.h>
#include <xnnpack/math.h>
-#include <xnnpack/params.h>
+#include <xnnpack/microparams.h>
typedef int8_t (*xnn_qs8_requantize_fn)(
diff --git a/src/xnnpack/rmaxabs.h b/src/xnnpack/rmaxabs.h
index e69f53a07..400bc43b8 100644
--- a/src/xnnpack/rmaxabs.h
+++ b/src/xnnpack/rmaxabs.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/spmm.h b/src/xnnpack/spmm.h
index 7cccab23a..dfce62b4c 100644
--- a/src/xnnpack/spmm.h
+++ b/src/xnnpack/spmm.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/transpose.h b/src/xnnpack/transpose.h
index 78a7dcc68..3634f6c8d 100644
--- a/src/xnnpack/transpose.h
+++ b/src/xnnpack/transpose.h
@@ -7,8 +7,8 @@
#include <stddef.h>
#include <stdint.h>
+
#include <xnnpack/common.h>
-#include <xnnpack/params.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/unpool.h b/src/xnnpack/unpool.h
index 875c54fdd..7aef9989e 100644
--- a/src/xnnpack/unpool.h
+++ b/src/xnnpack/unpool.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/vaddsub.h b/src/xnnpack/vaddsub.h
index c0c819507..c287522d8 100644
--- a/src/xnnpack/vaddsub.h
+++ b/src/xnnpack/vaddsub.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/vbinary.h b/src/xnnpack/vbinary.h
index 8dbd4e938..930344f9c 100644
--- a/src/xnnpack/vbinary.h
+++ b/src/xnnpack/vbinary.h
@@ -11,8 +11,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/vcvt.h b/src/xnnpack/vcvt.h
index a6a0c500f..3ca9d53ac 100644
--- a/src/xnnpack/vcvt.h
+++ b/src/xnnpack/vcvt.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/vlrelu.h b/src/xnnpack/vlrelu.h
index 35cfe5031..4c83e8e3b 100644
--- a/src/xnnpack/vlrelu.h
+++ b/src/xnnpack/vlrelu.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/vlshift.h b/src/xnnpack/vlshift.h
index 271e28663..23481e514 100644
--- a/src/xnnpack/vlshift.h
+++ b/src/xnnpack/vlshift.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/vmul.h b/src/xnnpack/vmul.h
index c01a2d69e..3441a8cfe 100644
--- a/src/xnnpack/vmul.h
+++ b/src/xnnpack/vmul.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/vmulcaddc.h b/src/xnnpack/vmulcaddc.h
index 05480f21d..0be7cf50d 100644
--- a/src/xnnpack/vmulcaddc.h
+++ b/src/xnnpack/vmulcaddc.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/vscaleexpminusmax.h b/src/xnnpack/vscaleexpminusmax.h
index 2b0a407ce..afb864b79 100644
--- a/src/xnnpack/vscaleexpminusmax.h
+++ b/src/xnnpack/vscaleexpminusmax.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/vscaleextexp.h b/src/xnnpack/vscaleextexp.h
index 6433ed69a..c84365e69 100644
--- a/src/xnnpack/vscaleextexp.h
+++ b/src/xnnpack/vscaleextexp.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/vunary.h b/src/xnnpack/vunary.h
index 9847f9ca7..aee3576d9 100644
--- a/src/xnnpack/vunary.h
+++ b/src/xnnpack/vunary.h
@@ -8,8 +8,8 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
+#include <xnnpack/microparams.h>
#ifdef __cplusplus
extern "C" {
diff --git a/src/xnnpack/window.h b/src/xnnpack/window.h
index 58fd0f3ad..dea724691 100644
--- a/src/xnnpack/window.h
+++ b/src/xnnpack/window.h
@@ -8,7 +8,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus
diff --git a/src/xnnpack/zip.h b/src/xnnpack/zip.h
index 52a2fffe5..e617e69c6 100644
--- a/src/xnnpack/zip.h
+++ b/src/xnnpack/zip.h
@@ -11,7 +11,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <xnnpack/params.h>
#include <xnnpack/common.h>
#ifdef __cplusplus