diff options
author | Marat Dukhan <maratek@google.com> | 2022-07-27 21:14:38 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-07-27 21:15:38 -0700 |
commit | c836505ed4498a2ebd1c21050c383a0a60a8defc (patch) | |
tree | b26a80a0c5d6581794cc953414d7e05df7653ac9 /src/xnnpack | |
parent | 917e63588c2664a12417beb01e59f9e4a10251bc (diff) | |
download | XNNPACK-c836505ed4498a2ebd1c21050c383a0a60a8defc.tar.gz |
Refactor declarations of microkernel parameters
- Extract declarations of microkernel parameters into microparams.h
- Group and document microkernel parameters
- Rename params-init accordingly
- Make microkernels depend only on microparams.h and not params.h
PiperOrigin-RevId: 463747649
Diffstat (limited to 'src/xnnpack')
40 files changed, 2506 insertions, 2455 deletions
diff --git a/src/xnnpack/argmaxpool.h b/src/xnnpack/argmaxpool.h index 3366f0c1a..c900ce1aa 100644 --- a/src/xnnpack/argmaxpool.h +++ b/src/xnnpack/argmaxpool.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/avgpool.h b/src/xnnpack/avgpool.h index d766ac7b4..366986b79 100644 --- a/src/xnnpack/avgpool.h +++ b/src/xnnpack/avgpool.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/conv.h b/src/xnnpack/conv.h index 0b02beeaf..02a713417 100644 --- a/src/xnnpack/conv.h +++ b/src/xnnpack/conv.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/depthtospace.h b/src/xnnpack/depthtospace.h index 285fd3cbe..358b9df4c 100644 --- a/src/xnnpack/depthtospace.h +++ b/src/xnnpack/depthtospace.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/dwconv.h b/src/xnnpack/dwconv.h index 4b1464e1e..83ef6e13d 100644 --- a/src/xnnpack/dwconv.h +++ b/src/xnnpack/dwconv.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/fill.h b/src/xnnpack/fill.h index 0b62fea94..97cfd5007 100644 --- a/src/xnnpack/fill.h +++ b/src/xnnpack/fill.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/gavgpool.h b/src/xnnpack/gavgpool.h index fc5b153a4..bec595df8 100644 --- a/src/xnnpack/gavgpool.h +++ b/src/xnnpack/gavgpool.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/gemm.h b/src/xnnpack/gemm.h index 519f51358..543ebfb10 100644 --- a/src/xnnpack/gemm.h +++ b/src/xnnpack/gemm.h @@ -11,8 +11,10 @@ #include <stddef.h> #include <stdint.h> +#include <xnnpack.h> // For xnn_status + #include <xnnpack/common.h> -#include <xnnpack/params.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/ibilinear.h b/src/xnnpack/ibilinear.h index 12ecb605c..2744e404b 100644 --- a/src/xnnpack/ibilinear.h +++ b/src/xnnpack/ibilinear.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/igemm.h b/src/xnnpack/igemm.h index 2ddd739f1..a79c3936d 100644 --- a/src/xnnpack/igemm.h +++ b/src/xnnpack/igemm.h @@ -11,8 +11,10 @@ #include <stddef.h> #include <stdint.h> +#include <xnnpack.h> // For xnn_status + #include <xnnpack/common.h> -#include <xnnpack/params.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/lut.h b/src/xnnpack/lut.h index f11954e01..57d36412b 100644 --- a/src/xnnpack/lut.h +++ b/src/xnnpack/lut.h @@ -11,7 +11,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/maxpool.h b/src/xnnpack/maxpool.h index 0310e77b7..a47c62531 100644 --- a/src/xnnpack/maxpool.h +++ b/src/xnnpack/maxpool.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/params-init.h b/src/xnnpack/microparams-init.h index 3e5aa121d..3e5aa121d 100644 --- a/src/xnnpack/params-init.h +++ b/src/xnnpack/microparams-init.h diff --git a/src/xnnpack/microparams.h b/src/xnnpack/microparams.h new file mode 100644 index 000000000..9c6c3bb41 --- /dev/null +++ b/src/xnnpack/microparams.h @@ -0,0 +1,2481 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/common.h> + + +// Default: serves to differentiate pointer types for micro-kernels without fused activation. + +union xnn_f16_default_params { + char _; // Dummy member variable to comply with the C standard +}; + +union xnn_f32_default_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + int32_t mask_table[14]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + + +// ReLU: serves to differentiate pointer types for micro-kernels with fused ReLU activation. + +union xnn_f32_relu_params { + char _; // Dummy member variable to comply with the C standard +}; + + +// Scale+Min+Max: used by AVGPOOL/GAVGPOOL microkernels. + +union xnn_f16_scaleminmax_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t scale; + uint16_t min; + uint16_t max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float min[8]; + XNN_ALIGN(32) float max[8]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_scaleminmax_params { + struct { + float scale; + float min; + float max; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float min[4]; + XNN_ALIGN(16) float max[4]; + } sse; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + + +// Min+Max: used by VCLAMP and GEMM/IGEMM/DWCONV/MAXPOOL/etc with MINMAX activation. + +union xnn_f16_minmax_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t min; + uint16_t max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float min[8]; + XNN_ALIGN(32) float max[8]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_minmax_params { + struct { + float min; + float max; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float min[4]; + XNN_ALIGN(16) float max[4]; + } sse; + struct { + XNN_ALIGN(32) float min[8]; + XNN_ALIGN(32) float max[8]; + int32_t mask_table[14]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float min[2]; + XNN_ALIGN(8) float max[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_s8_minmax_params { + struct { + int32_t min; + int32_t max; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint8_t bias[16]; + XNN_ALIGN(16) uint8_t min_with_bias[16]; + XNN_ALIGN(16) uint8_t max_with_bias[16]; + } sse2; + struct { + XNN_ALIGN(16) int8_t min[16]; + XNN_ALIGN(16) int8_t max[16]; + } sse4; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int8_t min; + int8_t max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int8_t min[8]; + XNN_ALIGN(8) int8_t max[8]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_u8_minmax_params { + struct { + uint32_t min; + uint32_t max; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint8_t min[16]; + XNN_ALIGN(16) uint8_t max[16]; + } sse2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint8_t min; + uint8_t max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) uint8_t min[8]; + XNN_ALIGN(8) uint8_t max[8]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Conv Min+Max: used by quantized GEMM/IGEMM/DWCONV microkernels with MINMAX activation. + +union xnn_qs8_minmax_params { + struct { + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } scalar_imagic; + struct { + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } scalar_fmagic; + struct { + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } scalar_lrintf; +#if XNN_ARCH_ARM + struct { + float magic_bias; + int32_t magic_bias_less_zero_point; + uint32_t output_min; + uint32_t output_max; + } armv6simd; +#endif // XNN_ARCH_ARM +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float magic_bias; + int32_t magic_bias_less_output_zero_point; + int8_t output_min; + int8_t output_max; + } neon; + struct { + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } neonv8; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int16_t output_min[8]; + } sse2; + struct { + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + } sse4; + struct { + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(32) int8_t output_min[32]; + } avx2; + struct { + XNN_ALIGN(64) float output_max_less_zero_point[16]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(64) int8_t output_min[64]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) int8_t output_max[8]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qs8_conv_minmax_params { + struct { + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } fp32_scalar_fmagic; + struct { + float scale; + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } fp32_scalar_imagic; + struct { + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } fp32_scalar_lrintf; +#if XNN_ARCH_ARM + struct { + float scale; + float magic_bias; + int32_t magic_bias_less_zero_point; + uint32_t output_min; + uint32_t output_max; + } fp32_armv6simd; +#endif // XNN_ARCH_ARM +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float scale; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + int8_t output_min; + int8_t output_max; + } fp32_neon; + struct { + float scale; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } fp32_neonv8; + struct { + int32_t right_pre_shift; + int32_t multiplier; + int32_t right_post_shift; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } rndnu_neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int16_t output_min[8]; + } fp32_sse2; + struct { + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + } fp32_sse4; + struct { + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(32) int8_t output_min[32]; + } fp32_avx2; + struct { + XNN_ALIGN(64) float scale[16]; + XNN_ALIGN(64) float output_max_less_zero_point[16]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(64) int8_t output_min[64]; + } fp32_avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) int8_t output_max[8]; + } fp32_wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_conv_minmax_params { + struct { + int32_t kernel_zero_point; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } fp32_scalar_fmagic; + struct { + int32_t kernel_zero_point; + float scale; + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } fp32_scalar_imagic; + struct { + int32_t kernel_zero_point; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } fp32_scalar_lrintf; +#if XNN_ARCH_ARM + struct { + float scale; + float magic_bias; + uint32_t minus_kernel_zero_point; + int32_t magic_bias_less_zero_point; + uint32_t output_min; + uint32_t output_max; + } fp32_armv6simd; +#endif // XNN_ARCH_ARM +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint8_t kernel_zero_point[4]; + float scale; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + uint8_t output_min; + uint8_t output_max; + } fp32_neon; + struct { + uint8_t kernel_zero_point[4]; + float scale; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } fp32_neonv8; + struct { + uint8_t kernel_zero_point[4]; + int32_t right_pre_shift; + int32_t multiplier; + int32_t right_post_shift; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } rndnu_neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int16_t kernel_zero_point[8]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + } fp32_sse2; + struct { + XNN_ALIGN(32) int16_t kernel_zero_point[16]; + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(32) uint8_t output_min[32]; + } fp32_avx2; + struct { + XNN_ALIGN(64) int16_t kernel_zero_point[32]; + XNN_ALIGN(64) float scale[16]; + XNN_ALIGN(64) float output_max_less_zero_point[16]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(64) uint8_t output_min[64]; + } fp32_avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t kernel_zero_point[4]; + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) int8_t output_max[8]; + } fp32_wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Add/Sub Min+Max: used by quantized VADD[C] microkernels with MINMAX activation. + +union xnn_qs8_addsub_minmax_params { + struct { + int32_t bias; + int32_t a_multiplier; + int32_t b_multiplier; + uint32_t shift; + int32_t output_min_less_zero_point; + int32_t output_max_less_zero_point; + int32_t output_zero_point; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int8_t a_zero_point; + int8_t b_zero_point; + int16_t output_zero_point; + int32_t a_multiplier; + int32_t b_multiplier; + int32_t right_shift; + int8_t output_min; + int8_t output_max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int32_t bias[4]; + XNN_ALIGN(16) uint16_t a_multiplier_lo[8]; + XNN_ALIGN(16) uint16_t a_multiplier_hi[8]; + XNN_ALIGN(16) uint16_t b_multiplier_lo[8]; + XNN_ALIGN(16) uint16_t b_multiplier_hi[8]; + uint32_t shift; + uint32_t b_multiplier; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int16_t output_min[8]; + XNN_ALIGN(16) int16_t output_max[8]; + } sse2; + struct { + XNN_ALIGN(16) int32_t bias[4]; + XNN_ALIGN(16) uint16_t a_multiplier_lo[8]; + XNN_ALIGN(16) uint16_t a_multiplier_hi[8]; + XNN_ALIGN(16) uint16_t b_multiplier_lo[8]; + XNN_ALIGN(16) uint16_t b_multiplier_hi[8]; + uint32_t shift; + uint32_t b_multiplier; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + XNN_ALIGN(16) int8_t output_max[16]; + } sse4_mul16; + struct { + XNN_ALIGN(16) int32_t bias[4]; + XNN_ALIGN(16) int32_t a_multiplier[4]; + XNN_ALIGN(16) int32_t b_multiplier[4]; + XNN_ALIGN(16) uint64_t shift[2]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + XNN_ALIGN(16) int8_t output_max[16]; + } sse4_mul32; + struct { + XNN_ALIGN(32) int32_t bias[8]; + XNN_ALIGN(32) int32_t a_multiplier[8]; + XNN_ALIGN(32) int32_t b_multiplier[8]; + XNN_ALIGN(32) uint64_t shift[4]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(16) int8_t output_min[16]; + XNN_ALIGN(16) int8_t output_max[16]; + } avx2; + struct { + XNN_ALIGN(64) int32_t bias[16]; + XNN_ALIGN(64) int32_t a_multiplier[16]; + XNN_ALIGN(64) int32_t b_multiplier[16]; + XNN_ALIGN(64) uint64_t shift[8]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(32) int8_t output_min[32]; + XNN_ALIGN(32) int8_t output_max[32]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int32_t bias[2]; + XNN_ALIGN(8) int32_t a_multiplier[2]; + XNN_ALIGN(8) int32_t b_multiplier[2]; + uint32_t shift; + XNN_ALIGN(8) int16_t output_zero_point[4]; + XNN_ALIGN(8) int8_t output_min[8]; + XNN_ALIGN(8) int8_t output_max[8]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_addsub_minmax_params { + struct { + int32_t bias; + int32_t a_multiplier; + int32_t b_multiplier; + int32_t rounding; + uint32_t shift; + int32_t output_min_less_zero_point; + int32_t output_max_less_zero_point; + int32_t output_zero_point; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint8_t a_zero_point; + uint8_t b_zero_point; + int16_t output_zero_point; + int32_t a_multiplier; + int32_t b_multiplier; + int32_t right_shift; + uint8_t output_min; + uint8_t output_max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int32_t bias[4]; + XNN_ALIGN(16) uint16_t a_multiplier_lo[8]; + XNN_ALIGN(16) uint16_t a_multiplier_hi[8]; + XNN_ALIGN(16) uint16_t b_multiplier_lo[8]; + XNN_ALIGN(16) uint16_t b_multiplier_hi[8]; + uint32_t shift; + uint32_t b_multiplier; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + XNN_ALIGN(16) uint8_t output_max[16]; + } sse2; + struct { + XNN_ALIGN(16) int32_t bias[4]; + XNN_ALIGN(16) int32_t a_multiplier[4]; + XNN_ALIGN(16) int32_t b_multiplier[4]; + XNN_ALIGN(16) uint64_t shift[2]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + XNN_ALIGN(16) uint8_t output_max[16]; + } sse4; + struct { + XNN_ALIGN(32) int32_t bias[8]; + XNN_ALIGN(32) int32_t a_multiplier[8]; + XNN_ALIGN(32) int32_t b_multiplier[8]; + XNN_ALIGN(32) uint64_t shift[4]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(16) uint8_t output_min[16]; + XNN_ALIGN(16) uint8_t output_max[16]; + } avx2; + struct { + XNN_ALIGN(64) int32_t bias[16]; + XNN_ALIGN(64) int32_t a_multiplier[16]; + XNN_ALIGN(64) int32_t b_multiplier[16]; + XNN_ALIGN(64) uint64_t shift[8]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(32) uint8_t output_min[32]; + XNN_ALIGN(32) uint8_t output_max[32]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int32_t bias[2]; + XNN_ALIGN(8) int32_t a_multiplier[2]; + XNN_ALIGN(8) int32_t b_multiplier[2]; + uint32_t shift; + XNN_ALIGN(8) int16_t output_zero_point[4]; + XNN_ALIGN(8) uint8_t output_min[8]; + XNN_ALIGN(8) uint8_t output_max[8]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Mul Min+Max: used by quantized VMUL[C] microkernels with MINMAX activation. + +union xnn_qs8_mul_minmax_params { + struct { + int32_t a_zero_point; + int32_t b_zero_point; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } fp32_scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int8_t a_zero_point[2]; + int8_t b_zero_point[2]; + float scale; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + int8_t output_min; + int8_t output_max; + } fp32_neon; + struct { + int8_t a_zero_point[2]; + int8_t b_zero_point[2]; + float scale; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } fp32_neonv8; + struct { + int8_t a_zero_point[2]; + int8_t b_zero_point[2]; + int32_t left_pre_shift; + int32_t multiplier; + int32_t left_post_shift; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } rndnu_neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int16_t a_zero_point[8]; + XNN_ALIGN(16) int16_t b_zero_point[8]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int16_t output_min[8]; + XNN_ALIGN(16) int16_t output_max[8]; + } fp32_sse2; + struct { + XNN_ALIGN(16) int16_t a_zero_point[8]; + XNN_ALIGN(16) int16_t b_zero_point[8]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + XNN_ALIGN(16) int8_t output_max[16]; + } fp32_sse4; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t a_zero_point[4]; + XNN_ALIGN(8) int16_t b_zero_point[4]; + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) int8_t output_max[8]; + } fp32_wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_mul_minmax_params { + struct { + int32_t a_zero_point; + int32_t b_zero_point; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } fp32_scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint8_t a_zero_point[2]; + uint8_t b_zero_point[2]; + float scale; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + uint8_t output_min; + uint8_t output_max; + } fp32_neon; + struct { + uint8_t a_zero_point[2]; + uint8_t b_zero_point[2]; + float scale; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } fp32_neonv8; + struct { + uint8_t a_zero_point[2]; + uint8_t b_zero_point[2]; + int32_t left_pre_shift; + int32_t multiplier; + int32_t left_post_shift; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } rndnu_neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int16_t a_zero_point[8]; + XNN_ALIGN(16) int16_t b_zero_point[8]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + XNN_ALIGN(16) uint8_t output_max[16]; + } fp32_sse2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t a_zero_point[4]; + XNN_ALIGN(8) int16_t b_zero_point[4]; + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) uint8_t output_max[8]; + } fp32_wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// AvgPool Min+Max: used by quantized GAVGPOOL microkernels with MINMAX activation. + +union xnn_qs8_avgpool_minmax_params { + struct { + int32_t init_bias; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } fp32_scalar_fmagic; + struct { + int32_t init_bias; + float scale; + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } fp32_scalar_imagic; + struct { + int32_t init_bias; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } fp32_scalar_lrintf; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int32_t init_bias; + float scale; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + int8_t output_min; + int8_t output_max; + } fp32_neon; + struct { + int32_t init_bias; + float scale; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } fp32_neonv8; + struct { + int32_t init_bias; + int32_t left_pre_shift; + int32_t multiplier; + int32_t left_post_shift; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } rndnu_neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int32_t init_bias[4]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int16_t output_min[8]; + } fp32_sse2; + struct { + XNN_ALIGN(16) int32_t init_bias[4]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + } fp32_sse4; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int32_t init_bias[2]; + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) int8_t output_max[8]; + } fp32_wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_avgpool_minmax_params { + struct { + int32_t init_bias; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + } fp32_scalar_fmagic; + struct { + int32_t init_bias; + float scale; + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } fp32_scalar_imagic; + struct { + int32_t init_bias; + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } fp32_scalar_lrintf; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int32_t init_bias; + float scale; + float magic_bias; + int32_t magic_bias_less_output_zero_point; + uint8_t output_min; + uint8_t output_max; + } fp32_neon; + struct { + int32_t init_bias; + float scale; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } fp32_neonv8; + struct { + int32_t init_bias; + int32_t left_pre_shift; + int32_t multiplier; + int32_t left_post_shift; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } rndnu_neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int32_t init_bias[4]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + } fp32_sse2; + struct { + XNN_ALIGN(16) int32_t init_bias[4]; + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + } fp32_sse4; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int32_t init_bias[2]; + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; + XNN_ALIGN(8) uint8_t output_max[8]; + } fp32_wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + + // Legacy parameters used by QU8 AVGPOOL microkernels + struct { + int32_t bias; + int32_t multiplier; + int64_t rounding; + uint32_t right_shift; + int32_t output_min_less_zero_point; + int32_t output_max_less_zero_point; + int32_t output_zero_point; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int32_t bias; + int32_t multiplier; + int64_t left_shift; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int32_t bias[4]; + XNN_ALIGN(16) uint32_t multiplier[4]; + XNN_ALIGN(16) uint64_t rounding[2]; + XNN_ALIGN(16) uint64_t right_shift[2]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + XNN_ALIGN(16) uint8_t output_max[16]; + } sse2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + + +// Abs: used by VABS microkernels. + +union xnn_f16_abs_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint16_t nonsign_mask[8]; + } sse; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_abs_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float nonsign_mask[4]; + } sse; + struct { + XNN_ALIGN(32) float nonsign_mask[8]; + int32_t mask_table[14]; + } avx; + struct { + uint32_t nonsign_mask; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float nonsign_mask[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Cvt (Convert): used by VCVT microkernels. + +union xnn_f16_f32_cvt_params { + struct { + uint32_t sign_mask; + uint32_t exp_offset; + float exp_scale; + uint32_t magic_mask; + float magic_bias; + uint32_t denorm_cutoff; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float exp_scale; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint16_t sign_mask[8]; + XNN_ALIGN(16) uint16_t exp_offset[8]; + XNN_ALIGN(16) float exp_scale[4]; + XNN_ALIGN(16) uint16_t magic_mask[8]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) int16_t denorm_cutoff[8]; + } sse_int16; + struct { + XNN_ALIGN(16) uint32_t sign_mask[4]; + XNN_ALIGN(16) uint32_t exp_offset[4]; + XNN_ALIGN(16) float exp_scale[4]; + XNN_ALIGN(16) uint32_t magic_bias[4]; + XNN_ALIGN(16) int32_t denorm_cutoff[4]; + } sse_int32; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) uint16_t sign_mask[4]; + XNN_ALIGN(8) uint16_t exp_offset[4]; + XNN_ALIGN(8) float exp_scale[2]; + XNN_ALIGN(8) uint16_t magic_mask[4]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int16_t denorm_cutoff[4]; + } wasmsimd_int16; + struct { + XNN_ALIGN(8) uint32_t sign_mask[2]; + XNN_ALIGN(8) uint32_t exp_offset[2]; + XNN_ALIGN(8) float exp_scale[2]; + XNN_ALIGN(8) uint32_t magic_bias[2]; + XNN_ALIGN(8) int32_t denorm_cutoff[2]; + } wasmsimd_int32; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_f32_f16_cvt_params { + struct { + uint32_t nonsign_mask; + uint32_t exp_bias; + float scale_to_inf; + uint32_t expw_max; + float scale_to_zero; + uint32_t bias_min; + uint16_t exph_mask; + uint16_t manth_mask; + uint16_t nanh; + } scalar_bitcast; + struct { + float scale_to_inf; + uint32_t exp_bias; + float scale_to_zero; + uint32_t expw_max; + uint32_t bias_min; + uint16_t exph_mask; + uint16_t manth_mask; + uint16_t nanh; + } scalar_fabsf; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint32_t exp_bias; + float scale_to_inf; + uint32_t expw_max; + float scale_to_zero; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint32_t nonsign_mask[4]; + XNN_ALIGN(16) uint32_t exp_bias[4]; + XNN_ALIGN(16) float scale_to_inf[4]; + XNN_ALIGN(16) uint32_t expw_max[4]; + XNN_ALIGN(16) float scale_to_zero[4]; + XNN_ALIGN(16) int16_t bias_min[8]; + XNN_ALIGN(16) uint32_t manth_mask[4]; + XNN_ALIGN(16) uint32_t exph_mask[4]; + XNN_ALIGN(16) uint16_t nanh[8]; + } sse2; + struct { + int32_t mask_table[14]; + } f16c; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) uint32_t exp_bias[2]; + XNN_ALIGN(8) float scale_to_inf[2]; + XNN_ALIGN(8) uint32_t expw_max[2]; + XNN_ALIGN(8) float scale_to_zero[2]; + XNN_ALIGN(8) int16_t bias_min[4]; + XNN_ALIGN(8) uint32_t manth_mask[2]; + XNN_ALIGN(8) uint32_t exph_mask[2]; + XNN_ALIGN(8) uint16_t nanh[4]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_f32_qs8_cvt_params { + struct { + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_zero_point; + } scalar_fmagic; + struct { + float scale; + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } scalar_imagic; + struct { + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } scalar_lrintf; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float scale; + float magic_bias; + int32_t magic_bias_less_zero_point; + int8_t output_min; + int8_t output_max; + } neon; + struct { + float scale; + int16_t output_zero_point; + int8_t output_min; + int8_t output_max; + } neonv8; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int16_t output_min[8]; + } sse2; + struct { + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + } sse4; + struct { + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) int8_t output_min[16]; + int32_t mask_table[14]; + } avx; + struct { + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(32) uint32_t shuffle_mask[8]; + XNN_ALIGN(32) int8_t output_min[32]; + int32_t mask_table[14]; + } avx2; + struct { + XNN_ALIGN(64) float scale[16]; + XNN_ALIGN(64) float output_max_less_zero_point[16]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(64) int8_t output_min[64]; + XNN_ALIGN(64) uint32_t shuffle512_mask[16]; + XNN_ALIGN(32) uint32_t shuffle256_mask[8]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + XNN_ALIGN(8) int8_t output_min[8]; + XNN_ALIGN(8) int8_t output_max[8]; + } wasmsimd_cvt; + struct { + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2]; + XNN_ALIGN(8) int8_t output_max[8]; + } wasmsimd_magic; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_f32_qu8_cvt_params { + struct { + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + float magic_bias; + int32_t magic_bias_less_zero_point; + } scalar_fmagic; + struct { + float scale; + float magic_bias; + int32_t magic_min; + int32_t magic_max; + int32_t magic_bias_less_zero_point; + } scalar_imagic; + struct { + float scale; + float output_min_less_zero_point; + float output_max_less_zero_point; + int32_t output_zero_point; + } scalar_lrintf; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float scale; + float magic_bias; + int32_t magic_bias_less_zero_point; + uint8_t output_min; + uint8_t output_max; + } neon; + struct { + float scale; + int16_t output_zero_point; + uint8_t output_min; + uint8_t output_max; + } neonv8; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float scale[4]; + XNN_ALIGN(16) float output_max_less_zero_point[4]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + } sse2; + struct { + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + XNN_ALIGN(16) uint8_t output_min[16]; + int32_t mask_table[14]; + } avx; + struct { + XNN_ALIGN(32) float scale[8]; + XNN_ALIGN(32) float output_max_less_zero_point[8]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + XNN_ALIGN(32) uint32_t shuffle_mask[8]; + XNN_ALIGN(32) uint8_t output_min[32]; + int32_t mask_table[14]; + } avx2; + struct { + XNN_ALIGN(64) float scale[16]; + XNN_ALIGN(64) float output_max_less_zero_point[16]; + XNN_ALIGN(64) int16_t output_zero_point[32]; + XNN_ALIGN(64) uint8_t output_min[64]; + XNN_ALIGN(64) uint32_t shuffle512_mask[16]; + XNN_ALIGN(32) uint32_t shuffle256_mask[8]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + XNN_ALIGN(8) uint8_t output_min[8]; + XNN_ALIGN(8) uint8_t output_max[8]; + } wasmsimd_cvt; + struct { + XNN_ALIGN(8) float scale[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) int32_t magic_min[2]; + XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2]; + XNN_ALIGN(8) uint8_t output_max[8]; + } wasmsimd_magic; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qs8_cvt_params { + struct { + int32_t bias; + int32_t multiplier; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint32_t minus_input_zero_point; + int32_t multiplier; + int32_t bias; + } armv6simd; + struct { + int16_t input_zero_point; + int16_t multiplier; + int16_t output_zero_point; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int16_t multiplier[8]; + XNN_ALIGN(16) int32_t bias[4]; + } sse2; + struct { + XNN_ALIGN(16) int16_t input_zero_point[8]; + XNN_ALIGN(16) int16_t multiplier[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + } ssse3; + struct { + XNN_ALIGN(32) int16_t input_zero_point[16]; + XNN_ALIGN(32) int16_t multiplier[16]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + } avx2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t input_zero_point[4]; + XNN_ALIGN(8) int16_t multiplier[4]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qs8_f32_cvt_params { + struct { + int32_t zero_point; + float scale; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int16_t minus_zero_point[2]; + float scale; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint8_t sign_mask[16]; + XNN_ALIGN(16) uint16_t magic_exp[8]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float scale[4]; + } sse2; + struct { + XNN_ALIGN(16) int32_t minus_zero_point[4]; + XNN_ALIGN(16) float scale[4]; + } sse4; + struct { + XNN_ALIGN(32) int32_t minus_zero_point[8]; + XNN_ALIGN(32) float scale[8]; + } avx; + struct { + XNN_ALIGN(64) int32_t minus_zero_point[16]; + XNN_ALIGN(64) float scale[16]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t minus_zero_point[4]; + XNN_ALIGN(8) float scale[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_cvt_params { + struct { + int32_t bias; + int32_t multiplier; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint32_t minus_input_zero_point; + int32_t multiplier; + int32_t bias; + } armv6simd; + struct { + uint16_t input_zero_point; + int16_t multiplier; + int16_t output_zero_point; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint16_t multiplier[8]; + XNN_ALIGN(16) int32_t bias[4]; + } sse2; + struct { + XNN_ALIGN(16) uint16_t input_zero_point[8]; + XNN_ALIGN(16) int16_t multiplier[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + } ssse3; + struct { + XNN_ALIGN(32) uint16_t input_zero_point[16]; + XNN_ALIGN(32) int16_t multiplier[16]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + } avx2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) uint16_t input_zero_point[4]; + XNN_ALIGN(8) int16_t multiplier[4]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_f32_cvt_params { + struct { + int32_t zero_point; + float scale; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + int16_t minus_zero_point[2]; + float scale; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint16_t magic_exp[8]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float scale[4]; + } sse2; + struct { + XNN_ALIGN(16) int32_t minus_zero_point[4]; + XNN_ALIGN(16) float scale[4]; + } sse4; + struct { + XNN_ALIGN(32) int32_t minus_zero_point[8]; + XNN_ALIGN(32) float scale[8]; + } avx; + struct { + XNN_ALIGN(64) int32_t minus_zero_point[16]; + XNN_ALIGN(64) float scale[16]; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t minus_zero_point[4]; + XNN_ALIGN(8) float scale[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// ELU: used by VELU microkernels. + +union xnn_f16_elu_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t prescale; + uint16_t sat_cutoff; + uint16_t magic_bias; + uint16_t log2e; + uint16_t minus_ln2; + uint16_t c3; + uint16_t c2; + uint16_t minus_alpha; + uint16_t beta; + } neonfp16arith_rr1_p3; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float c1[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + } avx2_rr1_p3; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_elu_params { + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2_hi; + float minus_ln2_lo; + float c3; + float c2; + float one; + } scalar_rr2_lut16_p3; + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2_hi; + float minus_ln2_lo; + float c6; + float c5; + float c4; + float c3; + float c2; + float one; + } scalar_rr2_p6; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2_hi; + float minus_ln2_lo; + float c6; + float c5; + float c4; + float c3; + float c2; + } neon_rr2_p6; + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2_hi; + float minus_ln2_lo; + float c3; + float c2; + } neon_rr2_lut16_p3; + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2; + float c6; + float c5; + float c4; + float c3; + float c2; + } neonfma_rr1_p6; + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2; + float c3; + float c2; + } neonfma_rr1_lut16_p3; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float prescale[4]; + XNN_ALIGN(16) float alpha[4]; + XNN_ALIGN(16) float beta[4]; + XNN_ALIGN(16) float sat_cutoff[4]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float log2e[4]; + XNN_ALIGN(16) uint32_t index_mask[4]; + XNN_ALIGN(16) float minus_ln2_hi[4]; + XNN_ALIGN(16) float minus_ln2_lo[4]; + XNN_ALIGN(16) float c3[4]; + XNN_ALIGN(16) float c2[4]; + XNN_ALIGN(16) float one[4]; + } sse2_rr2_lut16_p3; + struct { + XNN_ALIGN(16) float prescale[4]; + XNN_ALIGN(16) float alpha[4]; + XNN_ALIGN(16) float beta[4]; + XNN_ALIGN(16) float sat_cutoff[4]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float log2e[4]; + XNN_ALIGN(16) float minus_ln2_hi[4]; + XNN_ALIGN(16) float minus_ln2_lo[4]; + XNN_ALIGN(16) float c6[4]; + XNN_ALIGN(16) float c5[4]; + XNN_ALIGN(16) float c4[4]; + XNN_ALIGN(16) float c3[4]; + XNN_ALIGN(16) float c2[4]; + XNN_ALIGN(16) float one[4]; + } sse2_rr2_p6; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) uint32_t index_mask[8]; + XNN_ALIGN(32) float minus_ln2_hi[8]; + XNN_ALIGN(32) float minus_ln2_lo[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float one[8]; + int32_t mask_table[14]; + } avx_rr2_lut16_p3; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) uint32_t index_mask[8]; + XNN_ALIGN(32) float table[8]; + XNN_ALIGN(32) float minus_ln2_hi[8]; + XNN_ALIGN(32) float minus_ln2_lo[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float one[8]; + int32_t mask_table[14]; + } avx_rr2_lut4_p4; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2_hi[8]; + XNN_ALIGN(32) float minus_ln2_lo[8]; + XNN_ALIGN(32) float c6[8]; + XNN_ALIGN(32) float c5[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float one[8]; + int32_t mask_table[14]; + } avx_rr2_p6; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) uint32_t index_mask[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + int32_t mask_table[14]; + } avx2_rr1_lut16_p3; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) uint32_t table[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + int32_t mask_table[14]; + } avx2_rr1_lut8_p4; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float table[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + int32_t mask_table[14]; + } avx2_rr1_lut4_p4; + struct { + XNN_ALIGN(32) float prescale[8]; + XNN_ALIGN(32) float alpha[8]; + XNN_ALIGN(32) float beta[8]; + XNN_ALIGN(32) float sat_cutoff[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c6[8]; + XNN_ALIGN(32) float c5[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + int32_t mask_table[14]; + } avx2_rr1_p6; + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2; + float c3; + float c2; + XNN_ALIGN(64) uint32_t table[16]; + } avx512_rr1_lut16_p3; + struct { + float prescale; + float alpha; + float beta; + float sat_cutoff; + float magic_bias; + float log2e; + float minus_ln2; + float c6; + float c5; + float c4; + float c3; + float c2; + } avx512_rr1_p6; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float prescale[2]; + XNN_ALIGN(8) float alpha[2]; + XNN_ALIGN(8) float beta[2]; + XNN_ALIGN(8) float sat_cutoff[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) float log2e[2]; + XNN_ALIGN(8) uint32_t index_mask[2]; + XNN_ALIGN(8) float minus_ln2_hi[2]; + XNN_ALIGN(8) float minus_ln2_lo[2]; + XNN_ALIGN(8) float c3[2]; + XNN_ALIGN(8) float c2[2]; + XNN_ALIGN(8) float one[2]; + } wasmsimd_rr2_lut16_p3; + struct { + XNN_ALIGN(8) float prescale[2]; + XNN_ALIGN(8) float alpha[2]; + XNN_ALIGN(8) float beta[2]; + XNN_ALIGN(8) float sat_cutoff[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) float log2e[2]; + XNN_ALIGN(8) float minus_ln2_hi[2]; + XNN_ALIGN(8) float minus_ln2_lo[2]; + XNN_ALIGN(8) float c6[2]; + XNN_ALIGN(8) float c5[2]; + XNN_ALIGN(8) float c4[2]; + XNN_ALIGN(8) float c3[2]; + XNN_ALIGN(8) float c2[2]; + XNN_ALIGN(8) float one[2]; + } wasmsimd_rr2_p6; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// ExpMinus: used by RADDEXPMINUSMAX microkernels. + +union xnn_f16_expminus_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t magic_bias; + uint16_t log2e; + uint16_t minus_ln2_hi; + uint16_t minus_ln2_lo; + uint16_t c2; + uint16_t c1; + uint16_t denorm_cutoff; + } neonfp16arith_rr2_p2; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float c1[8]; + XNN_ALIGN(32) float denorm_cutoff[8]; + } avx2_rr1_p2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_expminus_params { + struct { + float log2e; + float magic_bias; + float minus_ln2_hi; + float minus_ln2_lo; + float c5; + float c4; + float c3; + float c2; + float c1; + float denorm_cutoff; + } scalar_rr2_p5; + struct { + float log2e; + float magic_bias; + float minus_ln2_hi; + float minus_ln2_lo; + float c2; + float denorm_cutoff; + } scalar_rr2_lut64_p2; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float log2e; + float magic_bias; + float minus_ln2_hi; + float minus_ln2_lo; + float c5; + float c4; + float c3; + float c2; + float c1; + float denorm_cutoff; + } neon_rr2_p5; + struct { + float log2e; + float magic_bias; + float minus_ln2_hi; + float minus_ln2_lo; + float c2; + float denorm_cutoff; + } neon_rr2_lut64_p2; + struct { + float log2e; + float magic_bias; + float minus_ln2; + float c5; + float c4; + float c3; + float c2; + float c1; + float denorm_cutoff; + } neonfma_rr1_p5; + struct { + float log2e; + float magic_bias; + float minus_ln2; + float c2; + float denorm_cutoff; + } neonfma_rr1_lut64_p2; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float log2e[4]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float minus_ln2_hi[4]; + XNN_ALIGN(16) float minus_ln2_lo[4]; + XNN_ALIGN(16) float c5[4]; + XNN_ALIGN(16) float c4[4]; + XNN_ALIGN(16) float c3[4]; + XNN_ALIGN(16) float c2[4]; + XNN_ALIGN(16) float c1[4]; + XNN_ALIGN(16) float denorm_cutoff[4]; + } sse2_rr2_p5; + struct { + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c5[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float c1[8]; + XNN_ALIGN(32) float denorm_cutoff[8]; + int32_t mask_table[14]; + } avx2_rr1_p5; + struct { + float log2e; + float minus_ln2; + float c5; + float c4; + float c3; + float c2; + float c1; + float c0; + } avx512_rr1_p5; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float log2e[2]; + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) float minus_ln2_hi[2]; + XNN_ALIGN(8) float minus_ln2_lo[2]; + XNN_ALIGN(8) float c5[2]; + XNN_ALIGN(8) float c4[2]; + XNN_ALIGN(8) float c3[2]; + XNN_ALIGN(8) float c2[2]; + XNN_ALIGN(8) float c1[2]; + XNN_ALIGN(8) float denorm_cutoff[2]; + } wasmsimd_rr2_p5; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// HSwish: used by VHSWISH microkernels. + +union xnn_f16_hswish_params { +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t sixth; + uint16_t three; + uint16_t six; + uint16_t pad; // pad to 8 bytes for neonfp16arith assembly. + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */ +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float sixth[8]; + XNN_ALIGN(32) float three[8]; + XNN_ALIGN(16) uint16_t six[8]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_hswish_params { + struct { + float sixth; + float three; + float six; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float sixth[4]; + XNN_ALIGN(16) float half[4]; + XNN_ALIGN(16) float one[4]; + } sse; + struct { + XNN_ALIGN(32) float sixth[8]; + XNN_ALIGN(32) float half[8]; + XNN_ALIGN(32) float one[8]; + int32_t mask_table[14]; + } avx; + struct { + float sixth; + float half; + float one; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float sixth[2]; + XNN_ALIGN(8) float three[2]; + XNN_ALIGN(8) float six[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// LReLU (Leaky ReLU): used by VLRELU microkernels. + +union xnn_f16_lrelu_params { +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t slope; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float slope[8]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_lrelu_params { + struct { + float slope; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float slope[4]; + } sse; + struct { + XNN_ALIGN(32) float slope[8]; + int32_t mask_table[14]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float slope[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qs8_lrelu_params { + struct { + int32_t input_zero_point; + int32_t positive_multiplier; + int32_t negative_multiplier; + int32_t bias; + } scalar_select; + struct { + int32_t input_zero_point; + int32_t multiplier_diff; + int32_t multiplier_base; + int32_t bias; + } scalar_andxor; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint32_t input_zero_point; + uint32_t positive_multiplier; + uint32_t negative_multiplier; + int32_t bias; + } armv6simd; + struct { + int16_t input_zero_point; + int16_t positive_multiplier; + int16_t negative_multiplier; + int16_t output_zero_point; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int16_t input_zero_point[8]; + XNN_ALIGN(16) int16_t multiplier_diff[8]; + XNN_ALIGN(16) int16_t multiplier_base[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + } sse2; + struct { + XNN_ALIGN(16) int16_t input_zero_point[8]; + XNN_ALIGN(16) int16_t positive_multiplier[8]; + XNN_ALIGN(16) int16_t negative_multiplier[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + } avx; + struct { + XNN_ALIGN(32) int16_t input_zero_point[16]; + XNN_ALIGN(32) int16_t positive_multiplier[16]; + XNN_ALIGN(32) int16_t negative_multiplier[16]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + } avx2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t input_zero_point[4]; + XNN_ALIGN(8) int16_t positive_multiplier[4]; + XNN_ALIGN(8) int16_t negative_multiplier[4]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + } wasmsimd_arm; + struct { + XNN_ALIGN(8) int16_t input_zero_point[4]; + XNN_ALIGN(8) int16_t multiplier_diff[4]; + XNN_ALIGN(8) int16_t multiplier_base[4]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + } wasmsimd_x86; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + +union xnn_qu8_lrelu_params { + struct { + int32_t input_zero_point; + int32_t positive_multiplier; + int32_t negative_multiplier; + int32_t bias; + } scalar_select; + struct { + int32_t input_zero_point; + int32_t multiplier_base; + int32_t multiplier_diff; + int32_t bias; + } scalar_andxor; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint32_t input_zero_point; + uint32_t positive_multiplier; + uint32_t negative_multiplier; + int32_t bias; + } armv6simd; + struct { + uint16_t input_zero_point; + int16_t positive_multiplier; + int16_t negative_multiplier; + int16_t output_zero_point; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) int16_t input_zero_point[8]; + XNN_ALIGN(16) int16_t multiplier_diff[8]; + XNN_ALIGN(16) int16_t multiplier_base[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + } sse2; + struct { + XNN_ALIGN(16) int16_t input_zero_point[8]; + XNN_ALIGN(16) int16_t positive_multiplier[8]; + XNN_ALIGN(16) int16_t negative_multiplier[8]; + XNN_ALIGN(16) int16_t output_zero_point[8]; + } avx; + struct { + XNN_ALIGN(32) int16_t input_zero_point[16]; + XNN_ALIGN(32) int16_t positive_multiplier[16]; + XNN_ALIGN(32) int16_t negative_multiplier[16]; + XNN_ALIGN(32) int16_t output_zero_point[16]; + } avx2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) int16_t input_zero_point[4]; + XNN_ALIGN(8) int16_t positive_multiplier[4]; + XNN_ALIGN(8) int16_t negative_multiplier[4]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + } wasmsimd_arm; + struct { + XNN_ALIGN(8) int16_t input_zero_point[4]; + XNN_ALIGN(8) int16_t multiplier_diff[4]; + XNN_ALIGN(8) int16_t multiplier_base[4]; + XNN_ALIGN(8) int16_t output_zero_point[4]; + } wasmsimd_x86; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Neg: used by VNEG microkernels. + +union xnn_f16_neg_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) uint16_t sign_mask[8]; + } sse; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_neg_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float sign_mask[4]; + } sse; + struct { + XNN_ALIGN(32) float sign_mask[8]; + int32_t mask_table[14]; + } avx; + struct { + uint32_t sign_mask; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float sign_mask[2]; + } wasmsimd; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Rnd (Round): used by VRNDNE/VRNDU/VRNDD/VRNDZ microkernels. + +union xnn_f16_rnd_params { + char _; // Dummy member variable to comply with the C standard +}; + +union xnn_f32_rnd_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float sign_mask[4]; + XNN_ALIGN(16) float one[4]; + } sse2; + struct { + int32_t mask_table[14]; + } avx; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + + +// Sigmoid: used by VSIGMOID microkernels. + +union xnn_f16_sigmoid_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t magic_bias; + uint16_t minus_log2e; + uint16_t ln2_hi; + uint16_t ln2_lo; + uint16_t c2; + uint16_t c1; + uint16_t denorm_cutoff; + } neonfp16arith_rr2_p2; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(32) float sign_mask[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float c1[8]; + XNN_ALIGN(32) float one[8]; + XNN_ALIGN(32) float denorm_cutoff[8]; + } avx2_rr1_p2; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + +union xnn_f32_sigmoid_params { + struct { + float magic_bias; + float minus_log2e; + float ln2_hi; + float ln2_lo; + float c1; + float one; + float denorm_cutoff; + } scalar_rr2_lut2048_p1; + struct { + float magic_bias; + float minus_log2e; + float ln2_hi; + float ln2_lo; + float c2; + float one; + float denorm_cutoff; + } scalar_rr2_lut64_p2; + struct { + float magic_bias; + float minus_log2e; + float ln2_hi; + float ln2_lo; + float c5; + float c4; + float c3; + float c2; + float c1; + float one; + float denorm_cutoff; + } scalar_rr2_p5; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float magic_bias; + float minus_log2e; + float ln2_hi; + float ln2_lo; + float c1; + float denorm_cutoff; + } neon_rr2_lut2048_p1; + struct { + float magic_bias; + float minus_log2e; + float ln2_hi; + float ln2_lo; + float c2; + float denorm_cutoff; + } neon_rr2_lut64_p2; + struct { + float magic_bias; + float minus_log2e; + float ln2_hi; + float ln2_lo; + float c5; + float c4; + float c3; + float c2; + float c1; + float denorm_cutoff; + } neon_rr2_p5; + struct { + float magic_bias; + float minus_log2e; + float ln2; + float c1; + float denorm_cutoff; + } neonfma_rr1_lut2048_p1; + struct { + float magic_bias; + float minus_log2e; + float ln2; + float c2; + float denorm_cutoff; + } neonfma_rr1_lut64_p2; + struct { + float magic_bias; + float minus_log2e; + float ln2; + float c5; + float c4; + float c3; + float c2; + float c1; + float denorm_cutoff; + } neonfma_rr1_p5; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float sign_mask[4]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float log2e[4]; + XNN_ALIGN(16) uint32_t index_mask[4]; + XNN_ALIGN(16) float minus_ln2_hi[4]; + XNN_ALIGN(16) float minus_ln2_lo[4]; + XNN_ALIGN(16) float c2[4]; + XNN_ALIGN(16) float one[4]; + XNN_ALIGN(16) float denorm_cutoff[4]; + } sse2_rr2_lut64_p2; + struct { + XNN_ALIGN(16) float sign_mask[4]; + XNN_ALIGN(16) float magic_bias[4]; + XNN_ALIGN(16) float log2e[4]; + XNN_ALIGN(16) float minus_ln2_hi[4]; + XNN_ALIGN(16) float minus_ln2_lo[4]; + XNN_ALIGN(16) float c5[4]; + XNN_ALIGN(16) float c4[4]; + XNN_ALIGN(16) float c3[4]; + XNN_ALIGN(16) float c2[4]; + XNN_ALIGN(16) float c1[4]; + XNN_ALIGN(16) float one[4]; + XNN_ALIGN(16) float denorm_cutoff[4]; + } sse2_rr2_p5; + struct { + XNN_ALIGN(32) float sign_mask[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2_hi[8]; + XNN_ALIGN(32) float minus_ln2_lo[8]; + XNN_ALIGN(32) float c5[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float c1[8]; + XNN_ALIGN(32) float one[8]; + XNN_ALIGN(32) float two[8]; + XNN_ALIGN(32) float denorm_cutoff[8]; + int32_t mask_table[14]; + } avx_rr2_p5; + struct { + XNN_ALIGN(32) float sign_mask[8]; + XNN_ALIGN(32) float magic_bias[8]; + XNN_ALIGN(32) float log2e[8]; + XNN_ALIGN(32) float minus_ln2[8]; + XNN_ALIGN(32) float c5[8]; + XNN_ALIGN(32) float c4[8]; + XNN_ALIGN(32) float c3[8]; + XNN_ALIGN(32) float c2[8]; + XNN_ALIGN(32) float c1[8]; + XNN_ALIGN(32) float one[8]; + XNN_ALIGN(32) float denorm_cutoff[8]; + int32_t mask_table[14]; + } avx2_rr1_p5; + struct { + uint32_t sign_mask; + float magic_bias; + float log2e; + float minus_ln2; + float c3; + float c2; + float one; + XNN_ALIGN(64) float table[16]; + } avx512_rr1_lut16_p3; + struct { + uint32_t sign_mask; + float magic_bias; + float log2e; + float minus_ln2_hi; + float minus_ln2_lo; + float c2; + float c1; + float one; + XNN_ALIGN(64) float table_lo[16]; + XNN_ALIGN(64) float table_hi[16]; + } avx512_rr2_lut32_p2; + struct { + uint32_t sign_mask; + float log2e; + float minus_ln2; + float c5; + float c4; + float c3; + float c2; + float c1; + float one; + } avx512_rr1_p5; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + struct { + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) float minus_log2e[2]; + XNN_ALIGN(8) uint32_t index_mask[2]; + XNN_ALIGN(8) float ln2_hi[2]; + XNN_ALIGN(8) float ln2_lo[2]; + XNN_ALIGN(8) float c2[2]; + XNN_ALIGN(8) float one[2]; + XNN_ALIGN(8) float denorm_cutoff[2]; + } wasmsimd_rr2_lut64_p2; + struct { + XNN_ALIGN(8) float magic_bias[2]; + XNN_ALIGN(8) float minus_log2e[2]; + XNN_ALIGN(8) float ln2_hi[2]; + XNN_ALIGN(8) float ln2_lo[2]; + XNN_ALIGN(8) float c5[2]; + XNN_ALIGN(8) float c4[2]; + XNN_ALIGN(8) float c3[2]; + XNN_ALIGN(8) float c2[2]; + XNN_ALIGN(8) float c1[2]; + XNN_ALIGN(8) float one[2]; + XNN_ALIGN(8) float denorm_cutoff[2]; + } wasmsimd_rr2_p5; +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +}; + + +// Sqrt (Square Root): used by VSQRT microkernels. + +union xnn_f16_sqrt_params { + char _; // Dummy member variable to comply with the C standard +}; + +union xnn_f32_sqrt_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + int32_t mask_table[14]; + } avx; + struct { + XNN_ALIGN(32) float half[8]; + int32_t mask_table[14]; + } fma; + struct { + float half; + } avx512; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + + +// CHW: used by CONV/DWCONV microkernels in CHW layout with Min+Max parameters. + +union xnn_f16_chw_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + uint16_t min; + uint16_t max; + XNN_ALIGN(8) uint16_t mask_even[4]; // used by stride 2 kernels + XNN_ALIGN(8) uint16_t mask_odd[4]; // used by stride 2 kernels + XNN_ALIGN(8) uint16_t mask[4]; // used by stride 1 kernels + XNN_ALIGN(16) uint16_t maskx8[8]; // used by stride 1 x8 kernels + } neonfp16arith; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +}; + +union xnn_f32_chw_params { + struct { + XNN_ALIGN(16) int32_t mask_even[4]; // used by stride 2 kernels + XNN_ALIGN(16) int32_t mask_odd[4]; // used by stride 2 kernels + XNN_ALIGN(16) int32_t mask[4]; // used by stride 1 kernels + float min; + float max; + } scalar; +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + float min; + float max; + XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels + XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels + XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float min[4]; + XNN_ALIGN(16) float max[4]; + XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels + XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels + XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels + } sse; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +}; + + +// GAvgPool (Global Average Pool): used by GAVGPOOL microkernels in CHW layout with Scale+Min+Max parameters. + +union xnn_f16_gavgpool_params { + char _; // Dummy member variable to comply with the C standard +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + XNN_ALIGN(16) uint16_t mask[8]; + uint16_t multiplier; + uint16_t output_min; + uint16_t output_max; + } neonfp16arith; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */ +}; + +union xnn_f32_gavgpool_params { + struct { + XNN_ALIGN(16) int32_t mask[4]; + float multiplier; + float output_min; + float output_max; + } scalar; +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 + struct { + XNN_ALIGN(16) float multiplier[4]; + XNN_ALIGN(16) float output_min[4]; + XNN_ALIGN(16) float output_max[4]; + XNN_ALIGN(16) uint32_t mask[4]; + } sse; +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 + struct { + XNN_ALIGN(16) uint32_t mask[4]; + float multiplier; + float output_min; + float output_max; + } neon; +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */ +}; + +// Forward declare for use in microkernel headers for JIT generator functions. +struct xnn_code_buffer; + +// JIT GEMM: used by GEMM/IGEMM microkernel generators. + +struct jit_gemm_params { + struct { + float min; + float max; + } f32_minmax; +}; diff --git a/src/xnnpack/pad.h b/src/xnnpack/pad.h index 835c38cc4..c4ea02578 100644 --- a/src/xnnpack/pad.h +++ b/src/xnnpack/pad.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h index 5216d2ca9..a596b9b24 100644 --- a/src/xnnpack/params.h +++ b/src/xnnpack/params.h @@ -14,2413 +14,9 @@ #include <xnnpack.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> -union xnn_f16_default_params { - // Empty; serves to differentiate pointer types for micro-kernels without fused activation. - char _; // Dummy member variable to comply with the C standard -}; - -// scaleminmax is used for avgpool ukernels. -union xnn_f16_scaleminmax_params { - // Empty; serves to differentiate pointer types for micro-kernels without fused activation. - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t scale; - uint16_t min; - uint16_t max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float min[8]; - XNN_ALIGN(32) float max[8]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f16_minmax_params { - // Empty; serves to differentiate pointer types for micro-kernels without fused activation. - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t min; - uint16_t max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float min[8]; - XNN_ALIGN(32) float max[8]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_default_params { - // Empty; serves to differentiate pointer types for micro-kernels without fused activation. - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - int32_t mask_table[14]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_relu_params { - // Empty; serves to differentiate pointer types for micro-kernels with different fused activations. - char _; // Dummy member variable to comply with the C standard -}; - -union xnn_f32_minmax_params { - struct { - float min; - float max; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float min[4]; - XNN_ALIGN(16) float max[4]; - } sse; - struct { - XNN_ALIGN(32) float min[8]; - XNN_ALIGN(32) float max[8]; - int32_t mask_table[14]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float min[2]; - XNN_ALIGN(8) float max[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f32_abs_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float nonsign_mask[4]; - } sse; - struct { - XNN_ALIGN(32) float nonsign_mask[8]; - int32_t mask_table[14]; - } avx; - struct { - uint32_t nonsign_mask; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float nonsign_mask[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f32_neg_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float sign_mask[4]; - } sse; - struct { - XNN_ALIGN(32) float sign_mask[8]; - int32_t mask_table[14]; - } avx; - struct { - uint32_t sign_mask; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float sign_mask[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f16_abs_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint16_t nonsign_mask[8]; - } sse; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f16_neg_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint16_t sign_mask[8]; - } sse; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f16_rnd_params { - char _; // Dummy member variable to comply with the C standard -}; - -union xnn_f32_rnd_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float sign_mask[4]; - XNN_ALIGN(16) float one[4]; - } sse2; - struct { - int32_t mask_table[14]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f16_elu_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t prescale; - uint16_t sat_cutoff; - uint16_t magic_bias; - uint16_t log2e; - uint16_t minus_ln2; - uint16_t c3; - uint16_t c2; - uint16_t minus_alpha; - uint16_t beta; - } neonfp16arith_rr1_p3; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float c1[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - } avx2_rr1_p3; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_elu_params { - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2_hi; - float minus_ln2_lo; - float c3; - float c2; - float one; - } scalar_rr2_lut16_p3; - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2_hi; - float minus_ln2_lo; - float c6; - float c5; - float c4; - float c3; - float c2; - float one; - } scalar_rr2_p6; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2_hi; - float minus_ln2_lo; - float c6; - float c5; - float c4; - float c3; - float c2; - } neon_rr2_p6; - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2_hi; - float minus_ln2_lo; - float c3; - float c2; - } neon_rr2_lut16_p3; - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2; - float c6; - float c5; - float c4; - float c3; - float c2; - } neonfma_rr1_p6; - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2; - float c3; - float c2; - } neonfma_rr1_lut16_p3; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float prescale[4]; - XNN_ALIGN(16) float alpha[4]; - XNN_ALIGN(16) float beta[4]; - XNN_ALIGN(16) float sat_cutoff[4]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float log2e[4]; - XNN_ALIGN(16) uint32_t index_mask[4]; - XNN_ALIGN(16) float minus_ln2_hi[4]; - XNN_ALIGN(16) float minus_ln2_lo[4]; - XNN_ALIGN(16) float c3[4]; - XNN_ALIGN(16) float c2[4]; - XNN_ALIGN(16) float one[4]; - } sse2_rr2_lut16_p3; - struct { - XNN_ALIGN(16) float prescale[4]; - XNN_ALIGN(16) float alpha[4]; - XNN_ALIGN(16) float beta[4]; - XNN_ALIGN(16) float sat_cutoff[4]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float log2e[4]; - XNN_ALIGN(16) float minus_ln2_hi[4]; - XNN_ALIGN(16) float minus_ln2_lo[4]; - XNN_ALIGN(16) float c6[4]; - XNN_ALIGN(16) float c5[4]; - XNN_ALIGN(16) float c4[4]; - XNN_ALIGN(16) float c3[4]; - XNN_ALIGN(16) float c2[4]; - XNN_ALIGN(16) float one[4]; - } sse2_rr2_p6; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) uint32_t index_mask[8]; - XNN_ALIGN(32) float minus_ln2_hi[8]; - XNN_ALIGN(32) float minus_ln2_lo[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float one[8]; - int32_t mask_table[14]; - } avx_rr2_lut16_p3; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) uint32_t index_mask[8]; - XNN_ALIGN(32) float table[8]; - XNN_ALIGN(32) float minus_ln2_hi[8]; - XNN_ALIGN(32) float minus_ln2_lo[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float one[8]; - int32_t mask_table[14]; - } avx_rr2_lut4_p4; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2_hi[8]; - XNN_ALIGN(32) float minus_ln2_lo[8]; - XNN_ALIGN(32) float c6[8]; - XNN_ALIGN(32) float c5[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float one[8]; - int32_t mask_table[14]; - } avx_rr2_p6; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) uint32_t index_mask[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - int32_t mask_table[14]; - } avx2_rr1_lut16_p3; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) uint32_t table[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - int32_t mask_table[14]; - } avx2_rr1_lut8_p4; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float table[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - int32_t mask_table[14]; - } avx2_rr1_lut4_p4; - struct { - XNN_ALIGN(32) float prescale[8]; - XNN_ALIGN(32) float alpha[8]; - XNN_ALIGN(32) float beta[8]; - XNN_ALIGN(32) float sat_cutoff[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c6[8]; - XNN_ALIGN(32) float c5[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - int32_t mask_table[14]; - } avx2_rr1_p6; - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2; - float c3; - float c2; - XNN_ALIGN(64) uint32_t table[16]; - } avx512_rr1_lut16_p3; - struct { - float prescale; - float alpha; - float beta; - float sat_cutoff; - float magic_bias; - float log2e; - float minus_ln2; - float c6; - float c5; - float c4; - float c3; - float c2; - } avx512_rr1_p6; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float prescale[2]; - XNN_ALIGN(8) float alpha[2]; - XNN_ALIGN(8) float beta[2]; - XNN_ALIGN(8) float sat_cutoff[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) float log2e[2]; - XNN_ALIGN(8) uint32_t index_mask[2]; - XNN_ALIGN(8) float minus_ln2_hi[2]; - XNN_ALIGN(8) float minus_ln2_lo[2]; - XNN_ALIGN(8) float c3[2]; - XNN_ALIGN(8) float c2[2]; - XNN_ALIGN(8) float one[2]; - } wasmsimd_rr2_lut16_p3; - struct { - XNN_ALIGN(8) float prescale[2]; - XNN_ALIGN(8) float alpha[2]; - XNN_ALIGN(8) float beta[2]; - XNN_ALIGN(8) float sat_cutoff[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) float log2e[2]; - XNN_ALIGN(8) float minus_ln2_hi[2]; - XNN_ALIGN(8) float minus_ln2_lo[2]; - XNN_ALIGN(8) float c6[2]; - XNN_ALIGN(8) float c5[2]; - XNN_ALIGN(8) float c4[2]; - XNN_ALIGN(8) float c3[2]; - XNN_ALIGN(8) float c2[2]; - XNN_ALIGN(8) float one[2]; - } wasmsimd_rr2_p6; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f16_expminus_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t magic_bias; - uint16_t log2e; - uint16_t minus_ln2_hi; - uint16_t minus_ln2_lo; - uint16_t c2; - uint16_t c1; - uint16_t denorm_cutoff; - } neonfp16arith_rr2_p2; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float c1[8]; - XNN_ALIGN(32) float denorm_cutoff[8]; - } avx2_rr1_p2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_expminus_params { - struct { - float log2e; - float magic_bias; - float minus_ln2_hi; - float minus_ln2_lo; - float c5; - float c4; - float c3; - float c2; - float c1; - float denorm_cutoff; - } scalar_rr2_p5; - struct { - float log2e; - float magic_bias; - float minus_ln2_hi; - float minus_ln2_lo; - float c2; - float denorm_cutoff; - } scalar_rr2_lut64_p2; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float log2e; - float magic_bias; - float minus_ln2_hi; - float minus_ln2_lo; - float c5; - float c4; - float c3; - float c2; - float c1; - float denorm_cutoff; - } neon_rr2_p5; - struct { - float log2e; - float magic_bias; - float minus_ln2_hi; - float minus_ln2_lo; - float c2; - float denorm_cutoff; - } neon_rr2_lut64_p2; - struct { - float log2e; - float magic_bias; - float minus_ln2; - float c5; - float c4; - float c3; - float c2; - float c1; - float denorm_cutoff; - } neonfma_rr1_p5; - struct { - float log2e; - float magic_bias; - float minus_ln2; - float c2; - float denorm_cutoff; - } neonfma_rr1_lut64_p2; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float log2e[4]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float minus_ln2_hi[4]; - XNN_ALIGN(16) float minus_ln2_lo[4]; - XNN_ALIGN(16) float c5[4]; - XNN_ALIGN(16) float c4[4]; - XNN_ALIGN(16) float c3[4]; - XNN_ALIGN(16) float c2[4]; - XNN_ALIGN(16) float c1[4]; - XNN_ALIGN(16) float denorm_cutoff[4]; - } sse2_rr2_p5; - struct { - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c5[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float c1[8]; - XNN_ALIGN(32) float denorm_cutoff[8]; - int32_t mask_table[14]; - } avx2_rr1_p5; - struct { - float log2e; - float minus_ln2; - float c5; - float c4; - float c3; - float c2; - float c1; - float c0; - } avx512_rr1_p5; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float log2e[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) float minus_ln2_hi[2]; - XNN_ALIGN(8) float minus_ln2_lo[2]; - XNN_ALIGN(8) float c5[2]; - XNN_ALIGN(8) float c4[2]; - XNN_ALIGN(8) float c3[2]; - XNN_ALIGN(8) float c2[2]; - XNN_ALIGN(8) float c1[2]; - XNN_ALIGN(8) float denorm_cutoff[2]; - } wasmsimd_rr2_p5; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f16_lrelu_params { -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t slope; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float slope[8]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_lrelu_params { - struct { - float slope; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float slope[4]; - } sse; - struct { - XNN_ALIGN(32) float slope[8]; - int32_t mask_table[14]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float slope[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f16_sigmoid_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t magic_bias; - uint16_t minus_log2e; - uint16_t ln2_hi; - uint16_t ln2_lo; - uint16_t c2; - uint16_t c1; - uint16_t denorm_cutoff; - } neonfp16arith_rr2_p2; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float sign_mask[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float c1[8]; - XNN_ALIGN(32) float one[8]; - XNN_ALIGN(32) float denorm_cutoff[8]; - } avx2_rr1_p2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_sigmoid_params { - struct { - float magic_bias; - float minus_log2e; - float ln2_hi; - float ln2_lo; - float c1; - float one; - float denorm_cutoff; - } scalar_rr2_lut2048_p1; - struct { - float magic_bias; - float minus_log2e; - float ln2_hi; - float ln2_lo; - float c2; - float one; - float denorm_cutoff; - } scalar_rr2_lut64_p2; - struct { - float magic_bias; - float minus_log2e; - float ln2_hi; - float ln2_lo; - float c5; - float c4; - float c3; - float c2; - float c1; - float one; - float denorm_cutoff; - } scalar_rr2_p5; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float magic_bias; - float minus_log2e; - float ln2_hi; - float ln2_lo; - float c1; - float denorm_cutoff; - } neon_rr2_lut2048_p1; - struct { - float magic_bias; - float minus_log2e; - float ln2_hi; - float ln2_lo; - float c2; - float denorm_cutoff; - } neon_rr2_lut64_p2; - struct { - float magic_bias; - float minus_log2e; - float ln2_hi; - float ln2_lo; - float c5; - float c4; - float c3; - float c2; - float c1; - float denorm_cutoff; - } neon_rr2_p5; - struct { - float magic_bias; - float minus_log2e; - float ln2; - float c1; - float denorm_cutoff; - } neonfma_rr1_lut2048_p1; - struct { - float magic_bias; - float minus_log2e; - float ln2; - float c2; - float denorm_cutoff; - } neonfma_rr1_lut64_p2; - struct { - float magic_bias; - float minus_log2e; - float ln2; - float c5; - float c4; - float c3; - float c2; - float c1; - float denorm_cutoff; - } neonfma_rr1_p5; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float sign_mask[4]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float log2e[4]; - XNN_ALIGN(16) uint32_t index_mask[4]; - XNN_ALIGN(16) float minus_ln2_hi[4]; - XNN_ALIGN(16) float minus_ln2_lo[4]; - XNN_ALIGN(16) float c2[4]; - XNN_ALIGN(16) float one[4]; - XNN_ALIGN(16) float denorm_cutoff[4]; - } sse2_rr2_lut64_p2; - struct { - XNN_ALIGN(16) float sign_mask[4]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float log2e[4]; - XNN_ALIGN(16) float minus_ln2_hi[4]; - XNN_ALIGN(16) float minus_ln2_lo[4]; - XNN_ALIGN(16) float c5[4]; - XNN_ALIGN(16) float c4[4]; - XNN_ALIGN(16) float c3[4]; - XNN_ALIGN(16) float c2[4]; - XNN_ALIGN(16) float c1[4]; - XNN_ALIGN(16) float one[4]; - XNN_ALIGN(16) float denorm_cutoff[4]; - } sse2_rr2_p5; - struct { - XNN_ALIGN(32) float sign_mask[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2_hi[8]; - XNN_ALIGN(32) float minus_ln2_lo[8]; - XNN_ALIGN(32) float c5[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float c1[8]; - XNN_ALIGN(32) float one[8]; - XNN_ALIGN(32) float two[8]; - XNN_ALIGN(32) float denorm_cutoff[8]; - int32_t mask_table[14]; - } avx_rr2_p5; - struct { - XNN_ALIGN(32) float sign_mask[8]; - XNN_ALIGN(32) float magic_bias[8]; - XNN_ALIGN(32) float log2e[8]; - XNN_ALIGN(32) float minus_ln2[8]; - XNN_ALIGN(32) float c5[8]; - XNN_ALIGN(32) float c4[8]; - XNN_ALIGN(32) float c3[8]; - XNN_ALIGN(32) float c2[8]; - XNN_ALIGN(32) float c1[8]; - XNN_ALIGN(32) float one[8]; - XNN_ALIGN(32) float denorm_cutoff[8]; - int32_t mask_table[14]; - } avx2_rr1_p5; - struct { - uint32_t sign_mask; - float magic_bias; - float log2e; - float minus_ln2; - float c3; - float c2; - float one; - XNN_ALIGN(64) float table[16]; - } avx512_rr1_lut16_p3; - struct { - uint32_t sign_mask; - float magic_bias; - float log2e; - float minus_ln2_hi; - float minus_ln2_lo; - float c2; - float c1; - float one; - XNN_ALIGN(64) float table_lo[16]; - XNN_ALIGN(64) float table_hi[16]; - } avx512_rr2_lut32_p2; - struct { - uint32_t sign_mask; - float log2e; - float minus_ln2; - float c5; - float c4; - float c3; - float c2; - float c1; - float one; - } avx512_rr1_p5; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) float minus_log2e[2]; - XNN_ALIGN(8) uint32_t index_mask[2]; - XNN_ALIGN(8) float ln2_hi[2]; - XNN_ALIGN(8) float ln2_lo[2]; - XNN_ALIGN(8) float c2[2]; - XNN_ALIGN(8) float one[2]; - XNN_ALIGN(8) float denorm_cutoff[2]; - } wasmsimd_rr2_lut64_p2; - struct { - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) float minus_log2e[2]; - XNN_ALIGN(8) float ln2_hi[2]; - XNN_ALIGN(8) float ln2_lo[2]; - XNN_ALIGN(8) float c5[2]; - XNN_ALIGN(8) float c4[2]; - XNN_ALIGN(8) float c3[2]; - XNN_ALIGN(8) float c2[2]; - XNN_ALIGN(8) float c1[2]; - XNN_ALIGN(8) float one[2]; - XNN_ALIGN(8) float denorm_cutoff[2]; - } wasmsimd_rr2_p5; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f16_sqrt_params { - char _; // Dummy member variable to comply with the C standard -}; - -union xnn_f32_sqrt_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - int32_t mask_table[14]; - } avx; - struct { - XNN_ALIGN(32) float half[8]; - int32_t mask_table[14]; - } fma; - struct { - float half; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_chw_params { - struct { - XNN_ALIGN(16) int32_t mask_even[4]; // used by stride 2 kernels - XNN_ALIGN(16) int32_t mask_odd[4]; // used by stride 2 kernels - XNN_ALIGN(16) int32_t mask[4]; // used by stride 1 kernels - float min; - float max; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float min; - float max; - XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels - XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels - XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float min[4]; - XNN_ALIGN(16) float max[4]; - XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels - XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels - XNN_ALIGN(16) uint32_t mask[4]; // used by stride 1 kernels - } sse; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f16_chw_params { - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t min; - uint16_t max; - XNN_ALIGN(8) uint16_t mask_even[4]; // used by stride 2 kernels - XNN_ALIGN(8) uint16_t mask_odd[4]; // used by stride 2 kernels - XNN_ALIGN(8) uint16_t mask[4]; // used by stride 1 kernels - XNN_ALIGN(16) uint16_t maskx8[8]; // used by stride 1 x8 kernels - } neonfp16arith; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -}; - -union xnn_s8_minmax_params { - struct { - int32_t min; - int32_t max; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint8_t bias[16]; - XNN_ALIGN(16) uint8_t min_with_bias[16]; - XNN_ALIGN(16) uint8_t max_with_bias[16]; - } sse2; - struct { - XNN_ALIGN(16) int8_t min[16]; - XNN_ALIGN(16) int8_t max[16]; - } sse4; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int8_t min; - int8_t max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int8_t min[8]; - XNN_ALIGN(8) int8_t max[8]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_u8_minmax_params { - struct { - uint32_t min; - uint32_t max; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint8_t min[16]; - XNN_ALIGN(16) uint8_t max[16]; - } sse2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint8_t min; - uint8_t max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) uint8_t min[8]; - XNN_ALIGN(8) uint8_t max[8]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f32_scaleminmax_params { - struct { - float scale; - float min; - float max; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float min[4]; - XNN_ALIGN(16) float max[4]; - } sse; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_gavgpool_params { - struct { - XNN_ALIGN(16) int32_t mask[4]; - float multiplier; - float output_min; - float output_max; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float multiplier[4]; - XNN_ALIGN(16) float output_min[4]; - XNN_ALIGN(16) float output_max[4]; - XNN_ALIGN(16) uint32_t mask[4]; - } sse; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - XNN_ALIGN(16) uint32_t mask[4]; - float multiplier; - float output_min; - float output_max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */ -}; - -union xnn_f16_gavgpool_params { - // Empty; serves to differentiate pointer types for micro-kernels without fused activation. - char _; // Dummy member variable to comply with the C standard -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - XNN_ALIGN(16) uint16_t mask[8]; - uint16_t multiplier; - uint16_t output_min; - uint16_t output_max; - } neonfp16arith; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */ -}; - -union xnn_f16_hswish_params { -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint16_t sixth; - uint16_t three; - uint16_t six; - uint16_t pad; // pad to 8 bytes for neonfp16arith assembly. - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */ -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(32) float sixth[8]; - XNN_ALIGN(32) float three[8]; - XNN_ALIGN(16) uint16_t six[8]; - } avx; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_f32_hswish_params { - struct { - float sixth; - float three; - float six; - } scalar; -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float sixth[4]; - XNN_ALIGN(16) float half[4]; - XNN_ALIGN(16) float one[4]; - } sse; - struct { - XNN_ALIGN(32) float sixth[8]; - XNN_ALIGN(32) float half[8]; - XNN_ALIGN(32) float one[8]; - int32_t mask_table[14]; - } avx; - struct { - float sixth; - float half; - float one; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float sixth[2]; - XNN_ALIGN(8) float three[2]; - XNN_ALIGN(8) float six[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_conv_minmax_params { - struct { - int32_t kernel_zero_point; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } fp32_scalar_fmagic; - struct { - int32_t kernel_zero_point; - float scale; - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } fp32_scalar_imagic; - struct { - int32_t kernel_zero_point; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } fp32_scalar_lrintf; -#if XNN_ARCH_ARM - struct { - float scale; - float magic_bias; - uint32_t minus_kernel_zero_point; - int32_t magic_bias_less_zero_point; - uint32_t output_min; - uint32_t output_max; - } fp32_armv6simd; -#endif // XNN_ARCH_ARM -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint8_t kernel_zero_point[4]; - float scale; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - uint8_t output_min; - uint8_t output_max; - } fp32_neon; - struct { - uint8_t kernel_zero_point[4]; - float scale; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } fp32_neonv8; - struct { - uint8_t kernel_zero_point[4]; - int32_t right_pre_shift; - int32_t multiplier; - int32_t right_post_shift; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } rndnu_neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int16_t kernel_zero_point[8]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - } fp32_sse2; - struct { - XNN_ALIGN(32) int16_t kernel_zero_point[16]; - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(32) uint8_t output_min[32]; - } fp32_avx2; - struct { - XNN_ALIGN(64) int16_t kernel_zero_point[32]; - XNN_ALIGN(64) float scale[16]; - XNN_ALIGN(64) float output_max_less_zero_point[16]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(64) uint8_t output_min[64]; - } fp32_avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t kernel_zero_point[4]; - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) int8_t output_max[8]; - } fp32_wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_minmax_params { - struct { - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } scalar_imagic; - struct { - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } scalar_fmagic; - struct { - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } scalar_lrintf; -#if XNN_ARCH_ARM - struct { - float magic_bias; - int32_t magic_bias_less_zero_point; - uint32_t output_min; - uint32_t output_max; - } armv6simd; -#endif // XNN_ARCH_ARM -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float magic_bias; - int32_t magic_bias_less_output_zero_point; - int8_t output_min; - int8_t output_max; - } neon; - struct { - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } neonv8; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int16_t output_min[8]; - } sse2; - struct { - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - } sse4; - struct { - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(32) int8_t output_min[32]; - } avx2; - struct { - XNN_ALIGN(64) float output_max_less_zero_point[16]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(64) int8_t output_min[64]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) int8_t output_max[8]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_conv_minmax_params { - struct { - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } fp32_scalar_fmagic; - struct { - float scale; - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } fp32_scalar_imagic; - struct { - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } fp32_scalar_lrintf; -#if XNN_ARCH_ARM - struct { - float scale; - float magic_bias; - int32_t magic_bias_less_zero_point; - uint32_t output_min; - uint32_t output_max; - } fp32_armv6simd; -#endif // XNN_ARCH_ARM -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float scale; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - int8_t output_min; - int8_t output_max; - } fp32_neon; - struct { - float scale; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } fp32_neonv8; - struct { - int32_t right_pre_shift; - int32_t multiplier; - int32_t right_post_shift; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } rndnu_neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int16_t output_min[8]; - } fp32_sse2; - struct { - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - } fp32_sse4; - struct { - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(32) int8_t output_min[32]; - } fp32_avx2; - struct { - XNN_ALIGN(64) float scale[16]; - XNN_ALIGN(64) float output_max_less_zero_point[16]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(64) int8_t output_min[64]; - } fp32_avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) int8_t output_max[8]; - } fp32_wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_addsub_minmax_params { - struct { - int32_t bias; - int32_t a_multiplier; - int32_t b_multiplier; - int32_t rounding; - uint32_t shift; - int32_t output_min_less_zero_point; - int32_t output_max_less_zero_point; - int32_t output_zero_point; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint8_t a_zero_point; - uint8_t b_zero_point; - int16_t output_zero_point; - int32_t a_multiplier; - int32_t b_multiplier; - int32_t right_shift; - uint8_t output_min; - uint8_t output_max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int32_t bias[4]; - XNN_ALIGN(16) uint16_t a_multiplier_lo[8]; - XNN_ALIGN(16) uint16_t a_multiplier_hi[8]; - XNN_ALIGN(16) uint16_t b_multiplier_lo[8]; - XNN_ALIGN(16) uint16_t b_multiplier_hi[8]; - uint32_t shift; - uint32_t b_multiplier; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - XNN_ALIGN(16) uint8_t output_max[16]; - } sse2; - struct { - XNN_ALIGN(16) int32_t bias[4]; - XNN_ALIGN(16) int32_t a_multiplier[4]; - XNN_ALIGN(16) int32_t b_multiplier[4]; - XNN_ALIGN(16) uint64_t shift[2]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - XNN_ALIGN(16) uint8_t output_max[16]; - } sse4; - struct { - XNN_ALIGN(32) int32_t bias[8]; - XNN_ALIGN(32) int32_t a_multiplier[8]; - XNN_ALIGN(32) int32_t b_multiplier[8]; - XNN_ALIGN(32) uint64_t shift[4]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(16) uint8_t output_min[16]; - XNN_ALIGN(16) uint8_t output_max[16]; - } avx2; - struct { - XNN_ALIGN(64) int32_t bias[16]; - XNN_ALIGN(64) int32_t a_multiplier[16]; - XNN_ALIGN(64) int32_t b_multiplier[16]; - XNN_ALIGN(64) uint64_t shift[8]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(32) uint8_t output_min[32]; - XNN_ALIGN(32) uint8_t output_max[32]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int32_t bias[2]; - XNN_ALIGN(8) int32_t a_multiplier[2]; - XNN_ALIGN(8) int32_t b_multiplier[2]; - uint32_t shift; - XNN_ALIGN(8) int16_t output_zero_point[4]; - XNN_ALIGN(8) uint8_t output_min[8]; - XNN_ALIGN(8) uint8_t output_max[8]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_addsub_minmax_params { - struct { - int32_t bias; - int32_t a_multiplier; - int32_t b_multiplier; - uint32_t shift; - int32_t output_min_less_zero_point; - int32_t output_max_less_zero_point; - int32_t output_zero_point; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int8_t a_zero_point; - int8_t b_zero_point; - int16_t output_zero_point; - int32_t a_multiplier; - int32_t b_multiplier; - int32_t right_shift; - int8_t output_min; - int8_t output_max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int32_t bias[4]; - XNN_ALIGN(16) uint16_t a_multiplier_lo[8]; - XNN_ALIGN(16) uint16_t a_multiplier_hi[8]; - XNN_ALIGN(16) uint16_t b_multiplier_lo[8]; - XNN_ALIGN(16) uint16_t b_multiplier_hi[8]; - uint32_t shift; - uint32_t b_multiplier; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int16_t output_min[8]; - XNN_ALIGN(16) int16_t output_max[8]; - } sse2; - struct { - XNN_ALIGN(16) int32_t bias[4]; - XNN_ALIGN(16) uint16_t a_multiplier_lo[8]; - XNN_ALIGN(16) uint16_t a_multiplier_hi[8]; - XNN_ALIGN(16) uint16_t b_multiplier_lo[8]; - XNN_ALIGN(16) uint16_t b_multiplier_hi[8]; - uint32_t shift; - uint32_t b_multiplier; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - XNN_ALIGN(16) int8_t output_max[16]; - } sse4_mul16; - struct { - XNN_ALIGN(16) int32_t bias[4]; - XNN_ALIGN(16) int32_t a_multiplier[4]; - XNN_ALIGN(16) int32_t b_multiplier[4]; - XNN_ALIGN(16) uint64_t shift[2]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - XNN_ALIGN(16) int8_t output_max[16]; - } sse4_mul32; - struct { - XNN_ALIGN(32) int32_t bias[8]; - XNN_ALIGN(32) int32_t a_multiplier[8]; - XNN_ALIGN(32) int32_t b_multiplier[8]; - XNN_ALIGN(32) uint64_t shift[4]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(16) int8_t output_min[16]; - XNN_ALIGN(16) int8_t output_max[16]; - } avx2; - struct { - XNN_ALIGN(64) int32_t bias[16]; - XNN_ALIGN(64) int32_t a_multiplier[16]; - XNN_ALIGN(64) int32_t b_multiplier[16]; - XNN_ALIGN(64) uint64_t shift[8]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(32) int8_t output_min[32]; - XNN_ALIGN(32) int8_t output_max[32]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int32_t bias[2]; - XNN_ALIGN(8) int32_t a_multiplier[2]; - XNN_ALIGN(8) int32_t b_multiplier[2]; - uint32_t shift; - XNN_ALIGN(8) int16_t output_zero_point[4]; - XNN_ALIGN(8) int8_t output_min[8]; - XNN_ALIGN(8) int8_t output_max[8]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_mul_minmax_params { - struct { - int32_t a_zero_point; - int32_t b_zero_point; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } fp32_scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint8_t a_zero_point[2]; - uint8_t b_zero_point[2]; - float scale; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - uint8_t output_min; - uint8_t output_max; - } fp32_neon; - struct { - uint8_t a_zero_point[2]; - uint8_t b_zero_point[2]; - float scale; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } fp32_neonv8; - struct { - uint8_t a_zero_point[2]; - uint8_t b_zero_point[2]; - int32_t left_pre_shift; - int32_t multiplier; - int32_t left_post_shift; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } rndnu_neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int16_t a_zero_point[8]; - XNN_ALIGN(16) int16_t b_zero_point[8]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - XNN_ALIGN(16) uint8_t output_max[16]; - } fp32_sse2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t a_zero_point[4]; - XNN_ALIGN(8) int16_t b_zero_point[4]; - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) uint8_t output_max[8]; - } fp32_wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_mul_minmax_params { - struct { - int32_t a_zero_point; - int32_t b_zero_point; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } fp32_scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int8_t a_zero_point[2]; - int8_t b_zero_point[2]; - float scale; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - int8_t output_min; - int8_t output_max; - } fp32_neon; - struct { - int8_t a_zero_point[2]; - int8_t b_zero_point[2]; - float scale; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } fp32_neonv8; - struct { - int8_t a_zero_point[2]; - int8_t b_zero_point[2]; - int32_t left_pre_shift; - int32_t multiplier; - int32_t left_post_shift; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } rndnu_neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int16_t a_zero_point[8]; - XNN_ALIGN(16) int16_t b_zero_point[8]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int16_t output_min[8]; - XNN_ALIGN(16) int16_t output_max[8]; - } fp32_sse2; - struct { - XNN_ALIGN(16) int16_t a_zero_point[8]; - XNN_ALIGN(16) int16_t b_zero_point[8]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - XNN_ALIGN(16) int8_t output_max[16]; - } fp32_sse4; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t a_zero_point[4]; - XNN_ALIGN(8) int16_t b_zero_point[4]; - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) int8_t output_max[8]; - } fp32_wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_lrelu_params { - struct { - int32_t input_zero_point; - int32_t positive_multiplier; - int32_t negative_multiplier; - int32_t bias; - } scalar_select; - struct { - int32_t input_zero_point; - int32_t multiplier_diff; - int32_t multiplier_base; - int32_t bias; - } scalar_andxor; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint32_t input_zero_point; - uint32_t positive_multiplier; - uint32_t negative_multiplier; - int32_t bias; - } armv6simd; - struct { - int16_t input_zero_point; - int16_t positive_multiplier; - int16_t negative_multiplier; - int16_t output_zero_point; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int16_t input_zero_point[8]; - XNN_ALIGN(16) int16_t multiplier_diff[8]; - XNN_ALIGN(16) int16_t multiplier_base[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - } sse2; - struct { - XNN_ALIGN(16) int16_t input_zero_point[8]; - XNN_ALIGN(16) int16_t positive_multiplier[8]; - XNN_ALIGN(16) int16_t negative_multiplier[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - } avx; - struct { - XNN_ALIGN(32) int16_t input_zero_point[16]; - XNN_ALIGN(32) int16_t positive_multiplier[16]; - XNN_ALIGN(32) int16_t negative_multiplier[16]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - } avx2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t input_zero_point[4]; - XNN_ALIGN(8) int16_t positive_multiplier[4]; - XNN_ALIGN(8) int16_t negative_multiplier[4]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - } wasmsimd_arm; - struct { - XNN_ALIGN(8) int16_t input_zero_point[4]; - XNN_ALIGN(8) int16_t multiplier_diff[4]; - XNN_ALIGN(8) int16_t multiplier_base[4]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - } wasmsimd_x86; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_lrelu_params { - struct { - int32_t input_zero_point; - int32_t positive_multiplier; - int32_t negative_multiplier; - int32_t bias; - } scalar_select; - struct { - int32_t input_zero_point; - int32_t multiplier_base; - int32_t multiplier_diff; - int32_t bias; - } scalar_andxor; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint32_t input_zero_point; - uint32_t positive_multiplier; - uint32_t negative_multiplier; - int32_t bias; - } armv6simd; - struct { - uint16_t input_zero_point; - int16_t positive_multiplier; - int16_t negative_multiplier; - int16_t output_zero_point; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int16_t input_zero_point[8]; - XNN_ALIGN(16) int16_t multiplier_diff[8]; - XNN_ALIGN(16) int16_t multiplier_base[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - } sse2; - struct { - XNN_ALIGN(16) int16_t input_zero_point[8]; - XNN_ALIGN(16) int16_t positive_multiplier[8]; - XNN_ALIGN(16) int16_t negative_multiplier[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - } avx; - struct { - XNN_ALIGN(32) int16_t input_zero_point[16]; - XNN_ALIGN(32) int16_t positive_multiplier[16]; - XNN_ALIGN(32) int16_t negative_multiplier[16]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - } avx2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t input_zero_point[4]; - XNN_ALIGN(8) int16_t positive_multiplier[4]; - XNN_ALIGN(8) int16_t negative_multiplier[4]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - } wasmsimd_arm; - struct { - XNN_ALIGN(8) int16_t input_zero_point[4]; - XNN_ALIGN(8) int16_t multiplier_diff[4]; - XNN_ALIGN(8) int16_t multiplier_base[4]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - } wasmsimd_x86; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_avgpool_minmax_params { - struct { - int32_t init_bias; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } fp32_scalar_fmagic; - struct { - int32_t init_bias; - float scale; - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } fp32_scalar_imagic; - struct { - int32_t init_bias; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } fp32_scalar_lrintf; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int32_t init_bias; - float scale; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - uint8_t output_min; - uint8_t output_max; - } fp32_neon; - struct { - int32_t init_bias; - float scale; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } fp32_neonv8; - struct { - int32_t init_bias; - int32_t left_pre_shift; - int32_t multiplier; - int32_t left_post_shift; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } rndnu_neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int32_t init_bias[4]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - } fp32_sse2; - struct { - XNN_ALIGN(16) int32_t init_bias[4]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - } fp32_sse4; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int32_t init_bias[2]; - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) uint8_t output_max[8]; - } fp32_wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - // Legacy parameters used by QU8 AVGPOOL microkernels - struct { - int32_t bias; - int32_t multiplier; - int64_t rounding; - uint32_t right_shift; - int32_t output_min_less_zero_point; - int32_t output_max_less_zero_point; - int32_t output_zero_point; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int32_t bias; - int32_t multiplier; - int64_t left_shift; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int32_t bias[4]; - XNN_ALIGN(16) uint32_t multiplier[4]; - XNN_ALIGN(16) uint64_t rounding[2]; - XNN_ALIGN(16) uint64_t right_shift[2]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - XNN_ALIGN(16) uint8_t output_max[16]; - } sse2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -}; - -union xnn_qs8_avgpool_minmax_params { - struct { - int32_t init_bias; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - } fp32_scalar_fmagic; - struct { - int32_t init_bias; - float scale; - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } fp32_scalar_imagic; - struct { - int32_t init_bias; - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } fp32_scalar_lrintf; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int32_t init_bias; - float scale; - float magic_bias; - int32_t magic_bias_less_output_zero_point; - int8_t output_min; - int8_t output_max; - } fp32_neon; - struct { - int32_t init_bias; - float scale; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } fp32_neonv8; - struct { - int32_t init_bias; - int32_t left_pre_shift; - int32_t multiplier; - int32_t left_post_shift; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } rndnu_neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int32_t init_bias[4]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int16_t output_min[8]; - } fp32_sse2; - struct { - XNN_ALIGN(16) int32_t init_bias[4]; - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - } fp32_sse4; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int32_t init_bias[2]; - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_output_zero_point[2]; - XNN_ALIGN(8) int8_t output_max[8]; - } fp32_wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f16_f32_cvt_params { - struct { - uint32_t sign_mask; - uint32_t exp_offset; - float exp_scale; - uint32_t magic_mask; - float magic_bias; - uint32_t denorm_cutoff; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float exp_scale; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint16_t sign_mask[8]; - XNN_ALIGN(16) uint16_t exp_offset[8]; - XNN_ALIGN(16) float exp_scale[4]; - XNN_ALIGN(16) uint16_t magic_mask[8]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) int16_t denorm_cutoff[8]; - } sse_int16; - struct { - XNN_ALIGN(16) uint32_t sign_mask[4]; - XNN_ALIGN(16) uint32_t exp_offset[4]; - XNN_ALIGN(16) float exp_scale[4]; - XNN_ALIGN(16) uint32_t magic_bias[4]; - XNN_ALIGN(16) int32_t denorm_cutoff[4]; - } sse_int32; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) uint16_t sign_mask[4]; - XNN_ALIGN(8) uint16_t exp_offset[4]; - XNN_ALIGN(8) float exp_scale[2]; - XNN_ALIGN(8) uint16_t magic_mask[4]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int16_t denorm_cutoff[4]; - } wasmsimd_int16; - struct { - XNN_ALIGN(8) uint32_t sign_mask[2]; - XNN_ALIGN(8) uint32_t exp_offset[2]; - XNN_ALIGN(8) float exp_scale[2]; - XNN_ALIGN(8) uint32_t magic_bias[2]; - XNN_ALIGN(8) int32_t denorm_cutoff[2]; - } wasmsimd_int32; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f32_f16_cvt_params { - struct { - uint32_t nonsign_mask; - uint32_t exp_bias; - float scale_to_inf; - uint32_t expw_max; - float scale_to_zero; - uint32_t bias_min; - uint16_t exph_mask; - uint16_t manth_mask; - uint16_t nanh; - } scalar_bitcast; - struct { - float scale_to_inf; - uint32_t exp_bias; - float scale_to_zero; - uint32_t expw_max; - uint32_t bias_min; - uint16_t exph_mask; - uint16_t manth_mask; - uint16_t nanh; - } scalar_fabsf; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint32_t exp_bias; - float scale_to_inf; - uint32_t expw_max; - float scale_to_zero; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint32_t nonsign_mask[4]; - XNN_ALIGN(16) uint32_t exp_bias[4]; - XNN_ALIGN(16) float scale_to_inf[4]; - XNN_ALIGN(16) uint32_t expw_max[4]; - XNN_ALIGN(16) float scale_to_zero[4]; - XNN_ALIGN(16) int16_t bias_min[8]; - XNN_ALIGN(16) uint32_t manth_mask[4]; - XNN_ALIGN(16) uint32_t exph_mask[4]; - XNN_ALIGN(16) uint16_t nanh[8]; - } sse2; - struct { - int32_t mask_table[14]; - } f16c; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) uint32_t exp_bias[2]; - XNN_ALIGN(8) float scale_to_inf[2]; - XNN_ALIGN(8) uint32_t expw_max[2]; - XNN_ALIGN(8) float scale_to_zero[2]; - XNN_ALIGN(8) int16_t bias_min[4]; - XNN_ALIGN(8) uint32_t manth_mask[2]; - XNN_ALIGN(8) uint32_t exph_mask[2]; - XNN_ALIGN(8) uint16_t nanh[4]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f32_qs8_cvt_params { - struct { - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_zero_point; - } scalar_fmagic; - struct { - float scale; - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } scalar_imagic; - struct { - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } scalar_lrintf; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float scale; - float magic_bias; - int32_t magic_bias_less_zero_point; - int8_t output_min; - int8_t output_max; - } neon; - struct { - float scale; - int16_t output_zero_point; - int8_t output_min; - int8_t output_max; - } neonv8; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int16_t output_min[8]; - } sse2; - struct { - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - } sse4; - struct { - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) int8_t output_min[16]; - int32_t mask_table[14]; - } avx; - struct { - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(32) uint32_t shuffle_mask[8]; - XNN_ALIGN(32) int8_t output_min[32]; - int32_t mask_table[14]; - } avx2; - struct { - XNN_ALIGN(64) float scale[16]; - XNN_ALIGN(64) float output_max_less_zero_point[16]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(64) int8_t output_min[64]; - XNN_ALIGN(64) uint32_t shuffle512_mask[16]; - XNN_ALIGN(32) uint32_t shuffle256_mask[8]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - XNN_ALIGN(8) int8_t output_min[8]; - XNN_ALIGN(8) int8_t output_max[8]; - } wasmsimd_cvt; - struct { - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2]; - XNN_ALIGN(8) int8_t output_max[8]; - } wasmsimd_magic; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_f32_qu8_cvt_params { - struct { - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - float magic_bias; - int32_t magic_bias_less_zero_point; - } scalar_fmagic; - struct { - float scale; - float magic_bias; - int32_t magic_min; - int32_t magic_max; - int32_t magic_bias_less_zero_point; - } scalar_imagic; - struct { - float scale; - float output_min_less_zero_point; - float output_max_less_zero_point; - int32_t output_zero_point; - } scalar_lrintf; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - float scale; - float magic_bias; - int32_t magic_bias_less_zero_point; - uint8_t output_min; - uint8_t output_max; - } neon; - struct { - float scale; - int16_t output_zero_point; - uint8_t output_min; - uint8_t output_max; - } neonv8; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) float scale[4]; - XNN_ALIGN(16) float output_max_less_zero_point[4]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - } sse2; - struct { - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - XNN_ALIGN(16) uint8_t output_min[16]; - int32_t mask_table[14]; - } avx; - struct { - XNN_ALIGN(32) float scale[8]; - XNN_ALIGN(32) float output_max_less_zero_point[8]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - XNN_ALIGN(32) uint32_t shuffle_mask[8]; - XNN_ALIGN(32) uint8_t output_min[32]; - int32_t mask_table[14]; - } avx2; - struct { - XNN_ALIGN(64) float scale[16]; - XNN_ALIGN(64) float output_max_less_zero_point[16]; - XNN_ALIGN(64) int16_t output_zero_point[32]; - XNN_ALIGN(64) uint8_t output_min[64]; - XNN_ALIGN(64) uint32_t shuffle512_mask[16]; - XNN_ALIGN(32) uint32_t shuffle256_mask[8]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - XNN_ALIGN(8) uint8_t output_min[8]; - XNN_ALIGN(8) uint8_t output_max[8]; - } wasmsimd_cvt; - struct { - XNN_ALIGN(8) float scale[2]; - XNN_ALIGN(8) float magic_bias[2]; - XNN_ALIGN(8) int32_t magic_min[2]; - XNN_ALIGN(8) int32_t magic_bias_less_zero_point[2]; - XNN_ALIGN(8) uint8_t output_max[8]; - } wasmsimd_magic; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_cvt_params { - struct { - int32_t bias; - int32_t multiplier; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint32_t minus_input_zero_point; - int32_t multiplier; - int32_t bias; - } armv6simd; - struct { - int16_t input_zero_point; - int16_t multiplier; - int16_t output_zero_point; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) int16_t multiplier[8]; - XNN_ALIGN(16) int32_t bias[4]; - } sse2; - struct { - XNN_ALIGN(16) int16_t input_zero_point[8]; - XNN_ALIGN(16) int16_t multiplier[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - } ssse3; - struct { - XNN_ALIGN(32) int16_t input_zero_point[16]; - XNN_ALIGN(32) int16_t multiplier[16]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - } avx2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t input_zero_point[4]; - XNN_ALIGN(8) int16_t multiplier[4]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qs8_f32_cvt_params { - struct { - int32_t zero_point; - float scale; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int16_t minus_zero_point[2]; - float scale; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint8_t sign_mask[16]; - XNN_ALIGN(16) uint16_t magic_exp[8]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float scale[4]; - } sse2; - struct { - XNN_ALIGN(16) int32_t minus_zero_point[4]; - XNN_ALIGN(16) float scale[4]; - } sse4; - struct { - XNN_ALIGN(32) int32_t minus_zero_point[8]; - XNN_ALIGN(32) float scale[8]; - } avx; - struct { - XNN_ALIGN(64) int32_t minus_zero_point[16]; - XNN_ALIGN(64) float scale[16]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t minus_zero_point[4]; - XNN_ALIGN(8) float scale[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_cvt_params { - struct { - int32_t bias; - int32_t multiplier; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - uint32_t minus_input_zero_point; - int32_t multiplier; - int32_t bias; - } armv6simd; - struct { - uint16_t input_zero_point; - int16_t multiplier; - int16_t output_zero_point; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint16_t multiplier[8]; - XNN_ALIGN(16) int32_t bias[4]; - } sse2; - struct { - XNN_ALIGN(16) uint16_t input_zero_point[8]; - XNN_ALIGN(16) int16_t multiplier[8]; - XNN_ALIGN(16) int16_t output_zero_point[8]; - } ssse3; - struct { - XNN_ALIGN(32) uint16_t input_zero_point[16]; - XNN_ALIGN(32) int16_t multiplier[16]; - XNN_ALIGN(32) int16_t output_zero_point[16]; - } avx2; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) uint16_t input_zero_point[4]; - XNN_ALIGN(8) int16_t multiplier[4]; - XNN_ALIGN(8) int16_t output_zero_point[4]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - -union xnn_qu8_f32_cvt_params { - struct { - int32_t zero_point; - float scale; - } scalar; -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - struct { - int16_t minus_zero_point[2]; - float scale; - } neon; -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - struct { - XNN_ALIGN(16) uint16_t magic_exp[8]; - XNN_ALIGN(16) float magic_bias[4]; - XNN_ALIGN(16) float scale[4]; - } sse2; - struct { - XNN_ALIGN(16) int32_t minus_zero_point[4]; - XNN_ALIGN(16) float scale[4]; - } sse4; - struct { - XNN_ALIGN(32) int32_t minus_zero_point[8]; - XNN_ALIGN(32) float scale[8]; - } avx; - struct { - XNN_ALIGN(64) int32_t minus_zero_point[16]; - XNN_ALIGN(64) float scale[16]; - } avx512; -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - struct { - XNN_ALIGN(8) int16_t minus_zero_point[4]; - XNN_ALIGN(8) float scale[2]; - } wasmsimd; -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD -}; - typedef void (*xnn_ppmm_ukernel_function)( size_t mr, size_t nc, @@ -4284,16 +1880,6 @@ typedef void (*xnn_init_qc8_scale_params_fn)( const float scale[XNN_MIN_ELEMENTS(1)], void* packed_w); -// Forward declare to avoid circular includes between this and allocator.h. -struct xnn_code_buffer; - -struct jit_gemm_params { - struct { - float min; - float max; - } f32_minmax; -}; - typedef enum xnn_status (*xnn_jit_gemm_code_generator_function)( struct xnn_code_buffer *code, size_t max_mr, size_t nc, size_t kc, const void *params); typedef enum xnn_status (*xnn_jit_igemm_code_generator_function)( diff --git a/src/xnnpack/pavgpool.h b/src/xnnpack/pavgpool.h index 828cf35bd..6c4d6ab75 100644 --- a/src/xnnpack/pavgpool.h +++ b/src/xnnpack/pavgpool.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/ppmm.h b/src/xnnpack/ppmm.h index cec14872e..8379edfb9 100644 --- a/src/xnnpack/ppmm.h +++ b/src/xnnpack/ppmm.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/prelu.h b/src/xnnpack/prelu.h index f3d999793..bb0d63706 100644 --- a/src/xnnpack/prelu.h +++ b/src/xnnpack/prelu.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/raddexpminusmax.h b/src/xnnpack/raddexpminusmax.h index 8f73d1939..ffc6407ed 100644 --- a/src/xnnpack/raddexpminusmax.h +++ b/src/xnnpack/raddexpminusmax.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/raddextexp.h b/src/xnnpack/raddextexp.h index 14381d1ab..f16e90be8 100644 --- a/src/xnnpack/raddextexp.h +++ b/src/xnnpack/raddextexp.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/raddstoreexpminusmax.h b/src/xnnpack/raddstoreexpminusmax.h index 62aecbad0..d1c487cb5 100644 --- a/src/xnnpack/raddstoreexpminusmax.h +++ b/src/xnnpack/raddstoreexpminusmax.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/requantization-stubs.h b/src/xnnpack/requantization-stubs.h index 5da9597f7..39c3597c3 100644 --- a/src/xnnpack/requantization-stubs.h +++ b/src/xnnpack/requantization-stubs.h @@ -11,8 +11,6 @@ #include <stdint.h> #include <stddef.h> -#include <xnnpack/params.h> - #ifdef __cplusplus extern "C" { #endif diff --git a/src/xnnpack/requantization.h b/src/xnnpack/requantization.h index a17a1b1b8..4fe12aedd 100644 --- a/src/xnnpack/requantization.h +++ b/src/xnnpack/requantization.h @@ -13,11 +13,9 @@ #include <assert.h> #include <math.h> -#include <fp16.h> - #include <xnnpack/common.h> #include <xnnpack/math.h> -#include <xnnpack/params.h> +#include <xnnpack/microparams.h> typedef int8_t (*xnn_qs8_requantize_fn)( diff --git a/src/xnnpack/rmaxabs.h b/src/xnnpack/rmaxabs.h index e69f53a07..400bc43b8 100644 --- a/src/xnnpack/rmaxabs.h +++ b/src/xnnpack/rmaxabs.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/spmm.h b/src/xnnpack/spmm.h index 7cccab23a..dfce62b4c 100644 --- a/src/xnnpack/spmm.h +++ b/src/xnnpack/spmm.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/transpose.h b/src/xnnpack/transpose.h index 78a7dcc68..3634f6c8d 100644 --- a/src/xnnpack/transpose.h +++ b/src/xnnpack/transpose.h @@ -7,8 +7,8 @@ #include <stddef.h> #include <stdint.h> + #include <xnnpack/common.h> -#include <xnnpack/params.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/unpool.h b/src/xnnpack/unpool.h index 875c54fdd..7aef9989e 100644 --- a/src/xnnpack/unpool.h +++ b/src/xnnpack/unpool.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/vaddsub.h b/src/xnnpack/vaddsub.h index c0c819507..c287522d8 100644 --- a/src/xnnpack/vaddsub.h +++ b/src/xnnpack/vaddsub.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/vbinary.h b/src/xnnpack/vbinary.h index 8dbd4e938..930344f9c 100644 --- a/src/xnnpack/vbinary.h +++ b/src/xnnpack/vbinary.h @@ -11,8 +11,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/vcvt.h b/src/xnnpack/vcvt.h index a6a0c500f..3ca9d53ac 100644 --- a/src/xnnpack/vcvt.h +++ b/src/xnnpack/vcvt.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/vlrelu.h b/src/xnnpack/vlrelu.h index 35cfe5031..4c83e8e3b 100644 --- a/src/xnnpack/vlrelu.h +++ b/src/xnnpack/vlrelu.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/vlshift.h b/src/xnnpack/vlshift.h index 271e28663..23481e514 100644 --- a/src/xnnpack/vlshift.h +++ b/src/xnnpack/vlshift.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/vmul.h b/src/xnnpack/vmul.h index c01a2d69e..3441a8cfe 100644 --- a/src/xnnpack/vmul.h +++ b/src/xnnpack/vmul.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/vmulcaddc.h b/src/xnnpack/vmulcaddc.h index 05480f21d..0be7cf50d 100644 --- a/src/xnnpack/vmulcaddc.h +++ b/src/xnnpack/vmulcaddc.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/vscaleexpminusmax.h b/src/xnnpack/vscaleexpminusmax.h index 2b0a407ce..afb864b79 100644 --- a/src/xnnpack/vscaleexpminusmax.h +++ b/src/xnnpack/vscaleexpminusmax.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/vscaleextexp.h b/src/xnnpack/vscaleextexp.h index 6433ed69a..c84365e69 100644 --- a/src/xnnpack/vscaleextexp.h +++ b/src/xnnpack/vscaleextexp.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/vunary.h b/src/xnnpack/vunary.h index 9847f9ca7..aee3576d9 100644 --- a/src/xnnpack/vunary.h +++ b/src/xnnpack/vunary.h @@ -8,8 +8,8 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> +#include <xnnpack/microparams.h> #ifdef __cplusplus extern "C" { diff --git a/src/xnnpack/window.h b/src/xnnpack/window.h index 58fd0f3ad..dea724691 100644 --- a/src/xnnpack/window.h +++ b/src/xnnpack/window.h @@ -8,7 +8,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus diff --git a/src/xnnpack/zip.h b/src/xnnpack/zip.h index 52a2fffe5..e617e69c6 100644 --- a/src/xnnpack/zip.h +++ b/src/xnnpack/zip.h @@ -11,7 +11,6 @@ #include <stddef.h> #include <stdint.h> -#include <xnnpack/params.h> #include <xnnpack/common.h> #ifdef __cplusplus |