aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-07-25 11:12:42 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-07-25 11:14:01 -0700
commit2247560904f5366d6d370bb080cfc2dbe9f57598 (patch)
tree447a97d6d3f25016ab6b6cd5c3f7a30a8a188795
parentc7cb3c177fbcd277c29d0ead9eaf1390255591f0 (diff)
downloadXNNPACK-2247560904f5366d6d370bb080cfc2dbe9f57598.tar.gz
Rename asr_s32/asr_s64 to math_asr_s32/math_asr_s64
PiperOrigin-RevId: 463131507
-rw-r--r--src/qs8-requantization/gemmlowp-scalar.c8
-rw-r--r--src/qs8-requantization/rndna-scalar-signed64.c8
-rw-r--r--src/qs8-requantization/rndnu-scalar.c8
-rw-r--r--src/qs8-vadd/gen/minmax-scalar-x1.c2
-rw-r--r--src/qs8-vadd/gen/minmax-scalar-x2.c6
-rw-r--r--src/qs8-vadd/gen/minmax-scalar-x4.c10
-rw-r--r--src/qs8-vadd/scalar.c.in8
-rw-r--r--src/qs8-vaddc/gen/minmax-scalar-x1.c2
-rw-r--r--src/qs8-vaddc/gen/minmax-scalar-x2.c6
-rw-r--r--src/qs8-vaddc/gen/minmax-scalar-x4.c10
-rw-r--r--src/qs8-vaddc/scalar.c.in8
-rw-r--r--src/qs8-vcvt/armv6simd.c.in16
-rw-r--r--src/qs8-vcvt/gen/vcvt-armv6simd-x4.c14
-rw-r--r--src/qs8-vcvt/gen/vcvt-armv6simd-x8.c30
-rw-r--r--src/qs8-vcvt/gen/vcvt-scalar-x1.c2
-rw-r--r--src/qs8-vcvt/gen/vcvt-scalar-x2.c6
-rw-r--r--src/qs8-vcvt/gen/vcvt-scalar-x4.c10
-rw-r--r--src/qs8-vcvt/scalar.c.in8
-rw-r--r--src/qs8-vlrelu/armv6simd.c.in16
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-armv6simd-x4.c14
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-armv6simd-x8.c30
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x1.c4
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x2.c12
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x4.c20
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-scalar-select-x1.c2
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-scalar-select-x2.c6
-rw-r--r--src/qs8-vlrelu/gen/vlrelu-scalar-select-x4.c10
-rw-r--r--src/qs8-vlrelu/scalar-andxor.c.in16
-rw-r--r--src/qs8-vlrelu/scalar-select.c.in8
-rw-r--r--src/qu8-avgpool/9p8x-minmax-scalar-c1.c2
-rw-r--r--src/qu8-avgpool/9x-minmax-scalar-c1.c2
-rw-r--r--src/qu8-requantization/gemmlowp-scalar.c8
-rw-r--r--src/qu8-requantization/rndna-scalar-signed64.c8
-rw-r--r--src/qu8-vadd/gen/minmax-scalar-x1.c2
-rw-r--r--src/qu8-vadd/gen/minmax-scalar-x2.c6
-rw-r--r--src/qu8-vadd/gen/minmax-scalar-x4.c10
-rw-r--r--src/qu8-vaddc/gen/minmax-scalar-x1.c2
-rw-r--r--src/qu8-vaddc/gen/minmax-scalar-x2.c6
-rw-r--r--src/qu8-vaddc/gen/minmax-scalar-x4.c10
-rw-r--r--src/qu8-vcvt/gen/vcvt-armv6simd-x4.c14
-rw-r--r--src/qu8-vcvt/gen/vcvt-armv6simd-x8.c30
-rw-r--r--src/qu8-vcvt/gen/vcvt-scalar-x1.c2
-rw-r--r--src/qu8-vcvt/gen/vcvt-scalar-x2.c6
-rw-r--r--src/qu8-vcvt/gen/vcvt-scalar-x4.c10
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-armv6simd-x4.c14
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-armv6simd-x8.c30
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x1.c4
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x2.c12
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x4.c20
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-scalar-select-x1.c2
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-scalar-select-x2.c6
-rw-r--r--src/qu8-vlrelu/gen/vlrelu-scalar-select-x4.c10
-rw-r--r--src/s16-window/gen/scalar-x1.c2
-rw-r--r--src/s16-window/gen/scalar-x2.c6
-rw-r--r--src/s16-window/gen/scalar-x3.c8
-rw-r--r--src/s16-window/gen/scalar-x4.c10
-rw-r--r--src/s16-window/scalar.c.in4
-rw-r--r--src/s8-ibilinear/gen/scalar-c1.c2
-rw-r--r--src/s8-ibilinear/gen/scalar-c2.c6
-rw-r--r--src/s8-ibilinear/gen/scalar-c4.c10
-rw-r--r--src/s8-ibilinear/scalar.c.in6
-rw-r--r--src/u8-ibilinear/gen/scalar-c1.c2
-rw-r--r--src/u8-ibilinear/gen/scalar-c2.c6
-rw-r--r--src/u8-ibilinear/gen/scalar-c4.c10
-rw-r--r--src/xnnpack/math.h4
-rw-r--r--src/xnnpack/requantization.h8
-rw-r--r--test/ibilinear-microkernel-tester.h2
-rw-r--r--test/vcvt-microkernel-tester.h4
-rw-r--r--test/vlrelu-microkernel-tester.h4
69 files changed, 305 insertions, 305 deletions
diff --git a/src/qs8-requantization/gemmlowp-scalar.c b/src/qs8-requantization/gemmlowp-scalar.c
index 119e81edb..3bd4f42f1 100644
--- a/src/qs8-requantization/gemmlowp-scalar.c
+++ b/src/qs8-requantization/gemmlowp-scalar.c
@@ -100,10 +100,10 @@ void xnn_qs8_requantize_gemmlowp__scalar(
const int32_t z_remainder = (z_q31product & remainder_mask) - (int32_t) (z_q31product < 0);
const int32_t w_remainder = (w_q31product & remainder_mask) - (int32_t) (w_q31product < 0);
- const int32_t x_scaled = asr_s32(x_q31product, shift) + (int32_t) (x_remainder > threshold);
- const int32_t y_scaled = asr_s32(y_q31product, shift) + (int32_t) (y_remainder > threshold);
- const int32_t z_scaled = asr_s32(z_q31product, shift) + (int32_t) (z_remainder > threshold);
- const int32_t w_scaled = asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold);
+ const int32_t x_scaled = math_asr_s32(x_q31product, shift) + (int32_t) (x_remainder > threshold);
+ const int32_t y_scaled = math_asr_s32(y_q31product, shift) + (int32_t) (y_remainder > threshold);
+ const int32_t z_scaled = math_asr_s32(z_q31product, shift) + (int32_t) (z_remainder > threshold);
+ const int32_t w_scaled = math_asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold);
// Clamp scaled value with zero point between (qmin - zero point) and (qmax - zero point).
const int32_t x_clamped = math_min_s32(math_max_s32(x_scaled, smin), smax);
diff --git a/src/qs8-requantization/rndna-scalar-signed64.c b/src/qs8-requantization/rndna-scalar-signed64.c
index d04dc795a..e3d3d29db 100644
--- a/src/qs8-requantization/rndna-scalar-signed64.c
+++ b/src/qs8-requantization/rndna-scalar-signed64.c
@@ -65,10 +65,10 @@ void xnn_qs8_requantize_rndna__scalar_signed64(
// Note that although rounding is precomputed, it is dependent on shift value, and on processors with 64-bit
// "right shift with rounding" instruction each line below can be represented by just one such instruction
// (e.g. VRSHL.S64 on ARM NEON, SRSHL in ARM64 Advanced SIMD).
- const int32_t x_scaled = (int32_t) asr_s64(x_adjusted_product + rounding, shift);
- const int32_t y_scaled = (int32_t) asr_s64(y_adjusted_product + rounding, shift);
- const int32_t z_scaled = (int32_t) asr_s64(z_adjusted_product + rounding, shift);
- const int32_t w_scaled = (int32_t) asr_s64(w_adjusted_product + rounding, shift);
+ const int32_t x_scaled = (int32_t) math_asr_s64(x_adjusted_product + rounding, shift);
+ const int32_t y_scaled = (int32_t) math_asr_s64(y_adjusted_product + rounding, shift);
+ const int32_t z_scaled = (int32_t) math_asr_s64(z_adjusted_product + rounding, shift);
+ const int32_t w_scaled = (int32_t) math_asr_s64(w_adjusted_product + rounding, shift);
// Clamp scaled value with zero point between (qmin - zero point) and (qmax - zero point).
const int32_t x_clamped = math_min_s32(math_max_s32(x_scaled, smin), smax);
diff --git a/src/qs8-requantization/rndnu-scalar.c b/src/qs8-requantization/rndnu-scalar.c
index eafc7e768..84df119be 100644
--- a/src/qs8-requantization/rndnu-scalar.c
+++ b/src/qs8-requantization/rndnu-scalar.c
@@ -59,10 +59,10 @@ void xnn_qs8_requantize_rndnu__scalar(
// Note that although rounding is precomputed, it is dependent on shift value, and on processors with 64-bit
// "right shift with rounding" instruction each line below can be represented by just one such instruction
// (e.g. VRSHL.S64 on ARM NEON, SRSHL in ARM64 Advanced SIMD).
- const int32_t x_scaled = (int32_t) asr_s64(x_product + rounding, shift);
- const int32_t y_scaled = (int32_t) asr_s64(y_product + rounding, shift);
- const int32_t z_scaled = (int32_t) asr_s64(z_product + rounding, shift);
- const int32_t w_scaled = (int32_t) asr_s64(w_product + rounding, shift);
+ const int32_t x_scaled = (int32_t) math_asr_s64(x_product + rounding, shift);
+ const int32_t y_scaled = (int32_t) math_asr_s64(y_product + rounding, shift);
+ const int32_t z_scaled = (int32_t) math_asr_s64(z_product + rounding, shift);
+ const int32_t w_scaled = (int32_t) math_asr_s64(w_product + rounding, shift);
// Clamp scaled value with zero point between (qmin - zero point) and (qmax - zero point).
const int32_t x_clamped = math_min_s32(math_max_s32(x_scaled, smin), smax);
diff --git a/src/qs8-vadd/gen/minmax-scalar-x1.c b/src/qs8-vadd/gen/minmax-scalar-x1.c
index e390b7ae4..7cd7b2c7a 100644
--- a/src/qs8-vadd/gen/minmax-scalar-x1.c
+++ b/src/qs8-vadd/gen/minmax-scalar-x1.c
@@ -33,7 +33,7 @@ void xnn_qs8_vadd_minmax_ukernel__scalar_x1(
const int32_t vb = *input_b++;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (int8_t) (vout + voutput_zero_point);
diff --git a/src/qs8-vadd/gen/minmax-scalar-x2.c b/src/qs8-vadd/gen/minmax-scalar-x2.c
index bb55d42b3..fac2cee4c 100644
--- a/src/qs8-vadd/gen/minmax-scalar-x2.c
+++ b/src/qs8-vadd/gen/minmax-scalar-x2.c
@@ -42,8 +42,8 @@ void xnn_qs8_vadd_minmax_ukernel__scalar_x2(
vacc0 += vb0 * vb_multiplier;
vacc1 += vb1 * vb_multiplier;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -63,7 +63,7 @@ void xnn_qs8_vadd_minmax_ukernel__scalar_x2(
const int32_t vb = *input_b;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (int8_t) (vout + voutput_zero_point);
diff --git a/src/qs8-vadd/gen/minmax-scalar-x4.c b/src/qs8-vadd/gen/minmax-scalar-x4.c
index cb3e69590..dc613a7bb 100644
--- a/src/qs8-vadd/gen/minmax-scalar-x4.c
+++ b/src/qs8-vadd/gen/minmax-scalar-x4.c
@@ -50,10 +50,10 @@ void xnn_qs8_vadd_minmax_ukernel__scalar_x4(
vacc2 += vb2 * vb_multiplier;
vacc3 += vb3 * vb_multiplier;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
- int32_t vout2 = asr_s32(vacc2, vshift);
- int32_t vout3 = asr_s32(vacc3, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
+ int32_t vout2 = math_asr_s32(vacc2, vshift);
+ int32_t vout3 = math_asr_s32(vacc3, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -82,7 +82,7 @@ void xnn_qs8_vadd_minmax_ukernel__scalar_x4(
const int32_t vb = *input_b++;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (int8_t) (vout + voutput_zero_point);
diff --git a/src/qs8-vadd/scalar.c.in b/src/qs8-vadd/scalar.c.in
index 65ff4aa77..3cd4b8dcd 100644
--- a/src/qs8-vadd/scalar.c.in
+++ b/src/qs8-vadd/scalar.c.in
@@ -33,7 +33,7 @@ void xnn_${DATATYPE.lower()}_vadd_minmax_ukernel__scalar_x${BATCH_TILE}(
const int32_t vb = *input_b++;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (${XINT8_T}) (vout + voutput_zero_point);
@@ -55,7 +55,7 @@ void xnn_${DATATYPE.lower()}_vadd_minmax_ukernel__scalar_x${BATCH_TILE}(
vacc${N} += vb${N} * vb_multiplier;
$for N in range(BATCH_TILE):
- int32_t vout${N} = asr_s32(vacc${N}, vshift);
+ int32_t vout${N} = math_asr_s32(vacc${N}, vshift);
$for N in range(BATCH_TILE):
vout${N} = math_max_s32(vout${N}, voutput_min_less_zero_point);
@@ -76,7 +76,7 @@ void xnn_${DATATYPE.lower()}_vadd_minmax_ukernel__scalar_x${BATCH_TILE}(
const int32_t vb = *input_b;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (${XINT8_T}) (vout + voutput_zero_point);
@@ -86,7 +86,7 @@ void xnn_${DATATYPE.lower()}_vadd_minmax_ukernel__scalar_x${BATCH_TILE}(
const int32_t vb = *input_b++;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (${XINT8_T}) (vout + voutput_zero_point);
diff --git a/src/qs8-vaddc/gen/minmax-scalar-x1.c b/src/qs8-vaddc/gen/minmax-scalar-x1.c
index f7dc69be4..34597dae2 100644
--- a/src/qs8-vaddc/gen/minmax-scalar-x1.c
+++ b/src/qs8-vaddc/gen/minmax-scalar-x1.c
@@ -31,7 +31,7 @@ void xnn_qs8_vaddc_minmax_ukernel__scalar_x1(
const int32_t va = *input_a++;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (int8_t) (vout + voutput_zero_point);
diff --git a/src/qs8-vaddc/gen/minmax-scalar-x2.c b/src/qs8-vaddc/gen/minmax-scalar-x2.c
index 75b733a8e..cfc8f8ff6 100644
--- a/src/qs8-vaddc/gen/minmax-scalar-x2.c
+++ b/src/qs8-vaddc/gen/minmax-scalar-x2.c
@@ -36,8 +36,8 @@ void xnn_qs8_vaddc_minmax_ukernel__scalar_x2(
const int32_t vacc1 = vbias + va1 * va_multiplier;
input_b += 2;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -56,7 +56,7 @@ void xnn_qs8_vaddc_minmax_ukernel__scalar_x2(
const int32_t va = *input_a;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (int8_t) (vout + voutput_zero_point);
diff --git a/src/qs8-vaddc/gen/minmax-scalar-x4.c b/src/qs8-vaddc/gen/minmax-scalar-x4.c
index ee15c3d10..ed1124561 100644
--- a/src/qs8-vaddc/gen/minmax-scalar-x4.c
+++ b/src/qs8-vaddc/gen/minmax-scalar-x4.c
@@ -40,10 +40,10 @@ void xnn_qs8_vaddc_minmax_ukernel__scalar_x4(
const int32_t vacc3 = vbias + va3 * va_multiplier;
input_b += 4;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
- int32_t vout2 = asr_s32(vacc2, vshift);
- int32_t vout3 = asr_s32(vacc3, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
+ int32_t vout2 = math_asr_s32(vacc2, vshift);
+ int32_t vout3 = math_asr_s32(vacc3, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -71,7 +71,7 @@ void xnn_qs8_vaddc_minmax_ukernel__scalar_x4(
const int32_t va = *input_a++;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (int8_t) (vout + voutput_zero_point);
diff --git a/src/qs8-vaddc/scalar.c.in b/src/qs8-vaddc/scalar.c.in
index 61109b508..3616ad3c1 100644
--- a/src/qs8-vaddc/scalar.c.in
+++ b/src/qs8-vaddc/scalar.c.in
@@ -31,7 +31,7 @@ void xnn_${DATATYPE.lower()}_vaddc_minmax_ukernel__scalar_x${BATCH_TILE}(
const int32_t va = *input_a++;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (${XINT8_T}) (vout + voutput_zero_point);
@@ -49,7 +49,7 @@ void xnn_${DATATYPE.lower()}_vaddc_minmax_ukernel__scalar_x${BATCH_TILE}(
input_b += ${BATCH_TILE};
$for N in range(BATCH_TILE):
- int32_t vout${N} = asr_s32(vacc${N}, vshift);
+ int32_t vout${N} = math_asr_s32(vacc${N}, vshift);
$for N in range(BATCH_TILE):
vout${N} = math_max_s32(vout${N}, voutput_min_less_zero_point);
@@ -69,7 +69,7 @@ void xnn_${DATATYPE.lower()}_vaddc_minmax_ukernel__scalar_x${BATCH_TILE}(
const int32_t va = *input_a;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (${XINT8_T}) (vout + voutput_zero_point);
@@ -78,7 +78,7 @@ void xnn_${DATATYPE.lower()}_vaddc_minmax_ukernel__scalar_x${BATCH_TILE}(
const int32_t va = *input_a++;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (${XINT8_T}) (vout + voutput_zero_point);
diff --git a/src/qs8-vcvt/armv6simd.c.in b/src/qs8-vcvt/armv6simd.c.in
index 4dd21bee7..ebfffae98 100644
--- a/src/qs8-vcvt/armv6simd.c.in
+++ b/src/qs8-vcvt/armv6simd.c.in
@@ -48,7 +48,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__armv6simd_x${BATCH_TILE}(
int32_t vacc${ABC[N+3]} = __smlawt(vmultiplier, vx${ABC[N+1]}${ABC[N+3]}, vbias);
$for N in range(BATCH_TILE):
- vacc${ABC[N]} = ${__XSAT}(asr_s32(vacc${ABC[N]}, 1), 8);
+ vacc${ABC[N]} = ${__XSAT}(math_asr_s32(vacc${ABC[N]}, 1), 8);
$for N in range(BATCH_TILE):
y[${N}] = (${XINT8_T}) vacc${ABC[N]};
@@ -66,10 +66,10 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__armv6simd_x${BATCH_TILE}(
int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
int32_t vacc3 = __smlawt(vmultiplier, vx13, vbias);
- vacc0 = ${__XSAT}(asr_s32(vacc0, 1), 8);
- vacc1 = ${__XSAT}(asr_s32(vacc1, 1), 8);
- vacc2 = ${__XSAT}(asr_s32(vacc2, 1), 8);
- vacc3 = ${__XSAT}(asr_s32(vacc3, 1), 8);
+ vacc0 = ${__XSAT}(math_asr_s32(vacc0, 1), 8);
+ vacc1 = ${__XSAT}(math_asr_s32(vacc1, 1), 8);
+ vacc2 = ${__XSAT}(math_asr_s32(vacc2, 1), 8);
+ vacc3 = ${__XSAT}(math_asr_s32(vacc3, 1), 8);
y[0] = (${XINT8_T}) vacc0;
y[1] = (${XINT8_T}) vacc1;
@@ -87,13 +87,13 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__armv6simd_x${BATCH_TILE}(
int32_t vacc1 = __smlawb(vmultiplier, vx13, vbias);
const int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
- vacc0 = ${__XSAT}(asr_s32(vacc0, 1), 8);
- vacc1 = ${__XSAT}(asr_s32(vacc1, 1), 8);
+ vacc0 = ${__XSAT}(math_asr_s32(vacc0, 1), 8);
+ vacc1 = ${__XSAT}(math_asr_s32(vacc1, 1), 8);
if (n & (2 * sizeof(${XINT8_T}))) {
y[0] = (${XINT8_T}) vacc0;
y[1] = (${XINT8_T}) vacc1;
- vacc0 = ${__XSAT}(asr_s32(vacc2, 1), 8);
+ vacc0 = ${__XSAT}(math_asr_s32(vacc2, 1), 8);
y += 2;
}
if (n & (1 * sizeof(${XINT8_T}))) {
diff --git a/src/qs8-vcvt/gen/vcvt-armv6simd-x4.c b/src/qs8-vcvt/gen/vcvt-armv6simd-x4.c
index a7eaee4e4..ea76f2893 100644
--- a/src/qs8-vcvt/gen/vcvt-armv6simd-x4.c
+++ b/src/qs8-vcvt/gen/vcvt-armv6simd-x4.c
@@ -38,10 +38,10 @@ void xnn_qs8_vcvt_ukernel__armv6simd_x4(
int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
int32_t vacc3 = __smlawt(vmultiplier, vx13, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 1), 8);
- vacc1 = __ssat(asr_s32(vacc1, 1), 8);
- vacc2 = __ssat(asr_s32(vacc2, 1), 8);
- vacc3 = __ssat(asr_s32(vacc3, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 1), 8);
+ vacc2 = __ssat(math_asr_s32(vacc2, 1), 8);
+ vacc3 = __ssat(math_asr_s32(vacc3, 1), 8);
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
@@ -59,13 +59,13 @@ void xnn_qs8_vcvt_ukernel__armv6simd_x4(
int32_t vacc1 = __smlawb(vmultiplier, vx13, vbias);
const int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 1), 8);
- vacc1 = __ssat(asr_s32(vacc1, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 1), 8);
if (n & (2 * sizeof(int8_t))) {
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
- vacc0 = __ssat(asr_s32(vacc2, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc2, 1), 8);
y += 2;
}
if (n & (1 * sizeof(int8_t))) {
diff --git a/src/qs8-vcvt/gen/vcvt-armv6simd-x8.c b/src/qs8-vcvt/gen/vcvt-armv6simd-x8.c
index 9137dc452..9b3073492 100644
--- a/src/qs8-vcvt/gen/vcvt-armv6simd-x8.c
+++ b/src/qs8-vcvt/gen/vcvt-armv6simd-x8.c
@@ -45,14 +45,14 @@ void xnn_qs8_vcvt_ukernel__armv6simd_x8(
int32_t vacc6 = __smlawt(vmultiplier, vx46, vbias);
int32_t vacc7 = __smlawt(vmultiplier, vx57, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 1), 8);
- vacc1 = __ssat(asr_s32(vacc1, 1), 8);
- vacc2 = __ssat(asr_s32(vacc2, 1), 8);
- vacc3 = __ssat(asr_s32(vacc3, 1), 8);
- vacc4 = __ssat(asr_s32(vacc4, 1), 8);
- vacc5 = __ssat(asr_s32(vacc5, 1), 8);
- vacc6 = __ssat(asr_s32(vacc6, 1), 8);
- vacc7 = __ssat(asr_s32(vacc7, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 1), 8);
+ vacc2 = __ssat(math_asr_s32(vacc2, 1), 8);
+ vacc3 = __ssat(math_asr_s32(vacc3, 1), 8);
+ vacc4 = __ssat(math_asr_s32(vacc4, 1), 8);
+ vacc5 = __ssat(math_asr_s32(vacc5, 1), 8);
+ vacc6 = __ssat(math_asr_s32(vacc6, 1), 8);
+ vacc7 = __ssat(math_asr_s32(vacc7, 1), 8);
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
@@ -76,10 +76,10 @@ void xnn_qs8_vcvt_ukernel__armv6simd_x8(
int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
int32_t vacc3 = __smlawt(vmultiplier, vx13, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 1), 8);
- vacc1 = __ssat(asr_s32(vacc1, 1), 8);
- vacc2 = __ssat(asr_s32(vacc2, 1), 8);
- vacc3 = __ssat(asr_s32(vacc3, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 1), 8);
+ vacc2 = __ssat(math_asr_s32(vacc2, 1), 8);
+ vacc3 = __ssat(math_asr_s32(vacc3, 1), 8);
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
@@ -97,13 +97,13 @@ void xnn_qs8_vcvt_ukernel__armv6simd_x8(
int32_t vacc1 = __smlawb(vmultiplier, vx13, vbias);
const int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 1), 8);
- vacc1 = __ssat(asr_s32(vacc1, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 1), 8);
if (n & (2 * sizeof(int8_t))) {
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
- vacc0 = __ssat(asr_s32(vacc2, 1), 8);
+ vacc0 = __ssat(math_asr_s32(vacc2, 1), 8);
y += 2;
}
if (n & (1 * sizeof(int8_t))) {
diff --git a/src/qs8-vcvt/gen/vcvt-scalar-x1.c b/src/qs8-vcvt/gen/vcvt-scalar-x1.c
index 211188be4..9424bb751 100644
--- a/src/qs8-vcvt/gen/vcvt-scalar-x1.c
+++ b/src/qs8-vcvt/gen/vcvt-scalar-x1.c
@@ -25,7 +25,7 @@ void xnn_qs8_vcvt_ukernel__scalar_x1(
int32_t vacc = *x++;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y++ = (int8_t) vout;
diff --git a/src/qs8-vcvt/gen/vcvt-scalar-x2.c b/src/qs8-vcvt/gen/vcvt-scalar-x2.c
index 93b0327d5..5501229c0 100644
--- a/src/qs8-vcvt/gen/vcvt-scalar-x2.c
+++ b/src/qs8-vcvt/gen/vcvt-scalar-x2.c
@@ -29,8 +29,8 @@ void xnn_qs8_vcvt_ukernel__scalar_x2(
vacc0 = vbias + vacc0 * vmultiplier;
vacc1 = vbias + vacc1 * vmultiplier;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
vout0 = math_max_s32(vout0, -128);
vout1 = math_max_s32(vout1, -128);
@@ -46,7 +46,7 @@ void xnn_qs8_vcvt_ukernel__scalar_x2(
int32_t vacc = *x;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y = (int8_t) vout;
diff --git a/src/qs8-vcvt/gen/vcvt-scalar-x4.c b/src/qs8-vcvt/gen/vcvt-scalar-x4.c
index db786c92b..44c2ff627 100644
--- a/src/qs8-vcvt/gen/vcvt-scalar-x4.c
+++ b/src/qs8-vcvt/gen/vcvt-scalar-x4.c
@@ -33,10 +33,10 @@ void xnn_qs8_vcvt_ukernel__scalar_x4(
vacc2 = vbias + vacc2 * vmultiplier;
vacc3 = vbias + vacc3 * vmultiplier;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
- int32_t vout2 = asr_s32(vacc2, 8);
- int32_t vout3 = asr_s32(vacc3, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
+ int32_t vout2 = math_asr_s32(vacc2, 8);
+ int32_t vout3 = math_asr_s32(vacc3, 8);
vout0 = math_max_s32(vout0, -128);
vout1 = math_max_s32(vout1, -128);
@@ -59,7 +59,7 @@ void xnn_qs8_vcvt_ukernel__scalar_x4(
int32_t vacc = *x++;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y++ = (int8_t) vout;
diff --git a/src/qs8-vcvt/scalar.c.in b/src/qs8-vcvt/scalar.c.in
index 23270f573..284876e8c 100644
--- a/src/qs8-vcvt/scalar.c.in
+++ b/src/qs8-vcvt/scalar.c.in
@@ -28,7 +28,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__scalar_x${BATCH_TILE}(
int32_t vacc = *x++;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y++ = (${XINT8_T}) vout;
@@ -45,7 +45,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__scalar_x${BATCH_TILE}(
vacc${ABC[N]} = vbias + vacc${ABC[N]} * vmultiplier;
$for N in range(BATCH_TILE):
- int32_t vout${ABC[N]} = asr_s32(vacc${ABC[N]}, 8);
+ int32_t vout${ABC[N]} = math_asr_s32(vacc${ABC[N]}, 8);
$for N in range(BATCH_TILE):
vout${ABC[N]} = math_max_s32(vout${ABC[N]}, ${OUTPUT_MIN});
@@ -62,7 +62,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__scalar_x${BATCH_TILE}(
int32_t vacc = *x;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y = (${XINT8_T}) vout;
@@ -71,7 +71,7 @@ void xnn_${DATATYPE.lower()}_vcvt_ukernel__scalar_x${BATCH_TILE}(
int32_t vacc = *x++;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y++ = (${XINT8_T}) vout;
diff --git a/src/qs8-vlrelu/armv6simd.c.in b/src/qs8-vlrelu/armv6simd.c.in
index d9f3d07ac..302e0c3fc 100644
--- a/src/qs8-vlrelu/armv6simd.c.in
+++ b/src/qs8-vlrelu/armv6simd.c.in
@@ -56,7 +56,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__armv6simd_x${BATCH_TILE}(
int32_t vacc${ABC[N+3]} = __smlatt(vmultiplier${ABC[N+1]}${ABC[N+3]}, vx${ABC[N+1]}${ABC[N+3]}, vbias);
$for N in range(BATCH_TILE):
- vacc${ABC[N]} = ${__XSAT}(asr_s32(vacc${ABC[N]}, 8), 8);
+ vacc${ABC[N]} = ${__XSAT}(math_asr_s32(vacc${ABC[N]}, 8), 8);
$for N in range(BATCH_TILE):
y[${N}] = (${XINT8_T}) vacc${ABC[N]};
@@ -79,10 +79,10 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__armv6simd_x${BATCH_TILE}(
int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
int32_t vacc3 = __smlatt(vmultiplier13, vx13, vbias);
- vacc0 = ${__XSAT}(asr_s32(vacc0, 8), 8);
- vacc1 = ${__XSAT}(asr_s32(vacc1, 8), 8);
- vacc2 = ${__XSAT}(asr_s32(vacc2, 8), 8);
- vacc3 = ${__XSAT}(asr_s32(vacc3, 8), 8);
+ vacc0 = ${__XSAT}(math_asr_s32(vacc0, 8), 8);
+ vacc1 = ${__XSAT}(math_asr_s32(vacc1, 8), 8);
+ vacc2 = ${__XSAT}(math_asr_s32(vacc2, 8), 8);
+ vacc3 = ${__XSAT}(math_asr_s32(vacc3, 8), 8);
y[0] = (${XINT8_T}) vacc0;
y[1] = (${XINT8_T}) vacc1;
@@ -105,13 +105,13 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__armv6simd_x${BATCH_TILE}(
int32_t vacc1 = __smlabb(vmultiplier13, vx13, vbias);
const int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
- vacc0 = ${__XSAT}(asr_s32(vacc0, 8), 8);
- vacc1 = ${__XSAT}(asr_s32(vacc1, 8), 8);
+ vacc0 = ${__XSAT}(math_asr_s32(vacc0, 8), 8);
+ vacc1 = ${__XSAT}(math_asr_s32(vacc1, 8), 8);
if (n & (2 * sizeof(${XINT8_T}))) {
y[0] = (${XINT8_T}) vacc0;
y[1] = (${XINT8_T}) vacc1;
- vacc0 = ${__XSAT}(asr_s32(vacc2, 8), 8);
+ vacc0 = ${__XSAT}(math_asr_s32(vacc2, 8), 8);
y += 2;
}
if (n & (1 * sizeof(${XINT8_T}))) {
diff --git a/src/qs8-vlrelu/gen/vlrelu-armv6simd-x4.c b/src/qs8-vlrelu/gen/vlrelu-armv6simd-x4.c
index d99ad2e45..aa298b9b7 100644
--- a/src/qs8-vlrelu/gen/vlrelu-armv6simd-x4.c
+++ b/src/qs8-vlrelu/gen/vlrelu-armv6simd-x4.c
@@ -44,10 +44,10 @@ void xnn_qs8_vlrelu_ukernel__armv6simd_x4(
int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
int32_t vacc3 = __smlatt(vmultiplier13, vx13, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 8), 8);
- vacc1 = __ssat(asr_s32(vacc1, 8), 8);
- vacc2 = __ssat(asr_s32(vacc2, 8), 8);
- vacc3 = __ssat(asr_s32(vacc3, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 8), 8);
+ vacc2 = __ssat(math_asr_s32(vacc2, 8), 8);
+ vacc3 = __ssat(math_asr_s32(vacc3, 8), 8);
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
@@ -70,13 +70,13 @@ void xnn_qs8_vlrelu_ukernel__armv6simd_x4(
int32_t vacc1 = __smlabb(vmultiplier13, vx13, vbias);
const int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 8), 8);
- vacc1 = __ssat(asr_s32(vacc1, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 8), 8);
if (n & (2 * sizeof(int8_t))) {
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
- vacc0 = __ssat(asr_s32(vacc2, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc2, 8), 8);
y += 2;
}
if (n & (1 * sizeof(int8_t))) {
diff --git a/src/qs8-vlrelu/gen/vlrelu-armv6simd-x8.c b/src/qs8-vlrelu/gen/vlrelu-armv6simd-x8.c
index 1d4e233ac..053e92ace 100644
--- a/src/qs8-vlrelu/gen/vlrelu-armv6simd-x8.c
+++ b/src/qs8-vlrelu/gen/vlrelu-armv6simd-x8.c
@@ -55,14 +55,14 @@ void xnn_qs8_vlrelu_ukernel__armv6simd_x8(
int32_t vacc6 = __smlatt(vmultiplier46, vx46, vbias);
int32_t vacc7 = __smlatt(vmultiplier57, vx57, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 8), 8);
- vacc1 = __ssat(asr_s32(vacc1, 8), 8);
- vacc2 = __ssat(asr_s32(vacc2, 8), 8);
- vacc3 = __ssat(asr_s32(vacc3, 8), 8);
- vacc4 = __ssat(asr_s32(vacc4, 8), 8);
- vacc5 = __ssat(asr_s32(vacc5, 8), 8);
- vacc6 = __ssat(asr_s32(vacc6, 8), 8);
- vacc7 = __ssat(asr_s32(vacc7, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 8), 8);
+ vacc2 = __ssat(math_asr_s32(vacc2, 8), 8);
+ vacc3 = __ssat(math_asr_s32(vacc3, 8), 8);
+ vacc4 = __ssat(math_asr_s32(vacc4, 8), 8);
+ vacc5 = __ssat(math_asr_s32(vacc5, 8), 8);
+ vacc6 = __ssat(math_asr_s32(vacc6, 8), 8);
+ vacc7 = __ssat(math_asr_s32(vacc7, 8), 8);
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
@@ -91,10 +91,10 @@ void xnn_qs8_vlrelu_ukernel__armv6simd_x8(
int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
int32_t vacc3 = __smlatt(vmultiplier13, vx13, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 8), 8);
- vacc1 = __ssat(asr_s32(vacc1, 8), 8);
- vacc2 = __ssat(asr_s32(vacc2, 8), 8);
- vacc3 = __ssat(asr_s32(vacc3, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 8), 8);
+ vacc2 = __ssat(math_asr_s32(vacc2, 8), 8);
+ vacc3 = __ssat(math_asr_s32(vacc3, 8), 8);
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
@@ -117,13 +117,13 @@ void xnn_qs8_vlrelu_ukernel__armv6simd_x8(
int32_t vacc1 = __smlabb(vmultiplier13, vx13, vbias);
const int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
- vacc0 = __ssat(asr_s32(vacc0, 8), 8);
- vacc1 = __ssat(asr_s32(vacc1, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __ssat(math_asr_s32(vacc1, 8), 8);
if (n & (2 * sizeof(int8_t))) {
y[0] = (int8_t) vacc0;
y[1] = (int8_t) vacc1;
- vacc0 = __ssat(asr_s32(vacc2, 8), 8);
+ vacc0 = __ssat(math_asr_s32(vacc2, 8), 8);
y += 2;
}
if (n & (1 * sizeof(int8_t))) {
diff --git a/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x1.c b/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x1.c
index e5d8fd059..bd790691a 100644
--- a/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x1.c
+++ b/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x1.c
@@ -25,10 +25,10 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x1(
const int32_t vbias = params->scalar_andxor.bias;
do {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y++ = (int8_t) vout;
diff --git a/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x2.c b/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x2.c
index 5d52c2724..7e1f97862 100644
--- a/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x2.c
+++ b/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x2.c
@@ -31,8 +31,8 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x2(
vacc0 -= vinput_zero_point;
vacc1 -= vinput_zero_point;
- int32_t vmultiplier0 = asr_s32(vacc0, 31);
- int32_t vmultiplier1 = asr_s32(vacc1, 31);
+ int32_t vmultiplier0 = math_asr_s32(vacc0, 31);
+ int32_t vmultiplier1 = math_asr_s32(vacc1, 31);
vmultiplier0 &= vmultiplier_diff;
vmultiplier1 &= vmultiplier_diff;
@@ -43,8 +43,8 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x2(
vacc0 = vbias + vacc0 * vmultiplier0;
vacc1 = vbias + vacc1 * vmultiplier1;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
vout0 = math_max_s32(vout0, -128);
vout1 = math_max_s32(vout1, -128);
@@ -58,10 +58,10 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x2(
}
if XNN_UNLIKELY(n != 0) {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y = (int8_t) vout;
diff --git a/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x4.c b/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x4.c
index 00043810f..159482303 100644
--- a/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x4.c
+++ b/src/qs8-vlrelu/gen/vlrelu-scalar-andxor-x4.c
@@ -35,10 +35,10 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x4(
vacc2 -= vinput_zero_point;
vacc3 -= vinput_zero_point;
- int32_t vmultiplier0 = asr_s32(vacc0, 31);
- int32_t vmultiplier1 = asr_s32(vacc1, 31);
- int32_t vmultiplier2 = asr_s32(vacc2, 31);
- int32_t vmultiplier3 = asr_s32(vacc3, 31);
+ int32_t vmultiplier0 = math_asr_s32(vacc0, 31);
+ int32_t vmultiplier1 = math_asr_s32(vacc1, 31);
+ int32_t vmultiplier2 = math_asr_s32(vacc2, 31);
+ int32_t vmultiplier3 = math_asr_s32(vacc3, 31);
vmultiplier0 &= vmultiplier_diff;
vmultiplier1 &= vmultiplier_diff;
@@ -55,10 +55,10 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x4(
vacc2 = vbias + vacc2 * vmultiplier2;
vacc3 = vbias + vacc3 * vmultiplier3;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
- int32_t vout2 = asr_s32(vacc2, 8);
- int32_t vout3 = asr_s32(vacc3, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
+ int32_t vout2 = math_asr_s32(vacc2, 8);
+ int32_t vout3 = math_asr_s32(vacc3, 8);
vout0 = math_max_s32(vout0, -128);
vout1 = math_max_s32(vout1, -128);
@@ -79,10 +79,10 @@ void xnn_qs8_vlrelu_ukernel__scalar_andxor_x4(
if XNN_UNLIKELY(n != 0) {
do {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y++ = (int8_t) vout;
diff --git a/src/qs8-vlrelu/gen/vlrelu-scalar-select-x1.c b/src/qs8-vlrelu/gen/vlrelu-scalar-select-x1.c
index 59f41c68e..ed0ad7235 100644
--- a/src/qs8-vlrelu/gen/vlrelu-scalar-select-x1.c
+++ b/src/qs8-vlrelu/gen/vlrelu-scalar-select-x1.c
@@ -28,7 +28,7 @@ void xnn_qs8_vlrelu_ukernel__scalar_select_x1(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y++ = (int8_t) vout;
diff --git a/src/qs8-vlrelu/gen/vlrelu-scalar-select-x2.c b/src/qs8-vlrelu/gen/vlrelu-scalar-select-x2.c
index ffac12f0a..9c9925de0 100644
--- a/src/qs8-vlrelu/gen/vlrelu-scalar-select-x2.c
+++ b/src/qs8-vlrelu/gen/vlrelu-scalar-select-x2.c
@@ -37,8 +37,8 @@ void xnn_qs8_vlrelu_ukernel__scalar_select_x2(
vacc0 = vbias + vacc0 * vmultiplier0;
vacc1 = vbias + vacc1 * vmultiplier1;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
vout0 = math_max_s32(vout0, -128);
vout1 = math_max_s32(vout1, -128);
@@ -55,7 +55,7 @@ void xnn_qs8_vlrelu_ukernel__scalar_select_x2(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y = (int8_t) vout;
diff --git a/src/qs8-vlrelu/gen/vlrelu-scalar-select-x4.c b/src/qs8-vlrelu/gen/vlrelu-scalar-select-x4.c
index 3e9d93412..480febd29 100644
--- a/src/qs8-vlrelu/gen/vlrelu-scalar-select-x4.c
+++ b/src/qs8-vlrelu/gen/vlrelu-scalar-select-x4.c
@@ -45,10 +45,10 @@ void xnn_qs8_vlrelu_ukernel__scalar_select_x4(
vacc2 = vbias + vacc2 * vmultiplier2;
vacc3 = vbias + vacc3 * vmultiplier3;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
- int32_t vout2 = asr_s32(vacc2, 8);
- int32_t vout3 = asr_s32(vacc3, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
+ int32_t vout2 = math_asr_s32(vacc2, 8);
+ int32_t vout3 = math_asr_s32(vacc3, 8);
vout0 = math_max_s32(vout0, -128);
vout1 = math_max_s32(vout1, -128);
@@ -72,7 +72,7 @@ void xnn_qs8_vlrelu_ukernel__scalar_select_x4(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, -128);
vout = math_min_s32(vout, 127);
*y++ = (int8_t) vout;
diff --git a/src/qs8-vlrelu/scalar-andxor.c.in b/src/qs8-vlrelu/scalar-andxor.c.in
index 36b396d86..54083d7b6 100644
--- a/src/qs8-vlrelu/scalar-andxor.c.in
+++ b/src/qs8-vlrelu/scalar-andxor.c.in
@@ -28,10 +28,10 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_andxor_x${BATCH_TILE}(
$if BATCH_TILE == 1:
do {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y++ = (${XINT8_T}) vout;
@@ -48,7 +48,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_andxor_x${BATCH_TILE}(
vacc${ABC[N]} -= vinput_zero_point;
$for N in range(BATCH_TILE):
- int32_t vmultiplier${ABC[N]} = asr_s32(vacc${ABC[N]}, 31);
+ int32_t vmultiplier${ABC[N]} = math_asr_s32(vacc${ABC[N]}, 31);
$for N in range(BATCH_TILE):
vmultiplier${ABC[N]} &= vmultiplier_diff;
@@ -60,7 +60,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_andxor_x${BATCH_TILE}(
vacc${ABC[N]} = vbias + vacc${ABC[N]} * vmultiplier${ABC[N]};
$for N in range(BATCH_TILE):
- int32_t vout${ABC[N]} = asr_s32(vacc${ABC[N]}, 8);
+ int32_t vout${ABC[N]} = math_asr_s32(vacc${ABC[N]}, 8);
$for N in range(BATCH_TILE):
vout${ABC[N]} = math_max_s32(vout${ABC[N]}, ${OUTPUT_MIN});
@@ -75,20 +75,20 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_andxor_x${BATCH_TILE}(
if XNN_UNLIKELY(n != 0) {
$if BATCH_TILE == 2:
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y = (${XINT8_T}) vout;
$else:
do {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y++ = (${XINT8_T}) vout;
diff --git a/src/qs8-vlrelu/scalar-select.c.in b/src/qs8-vlrelu/scalar-select.c.in
index 83723566f..44dc0ada6 100644
--- a/src/qs8-vlrelu/scalar-select.c.in
+++ b/src/qs8-vlrelu/scalar-select.c.in
@@ -31,7 +31,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_select_x${BATCH_TILE}(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y++ = (${XINT8_T}) vout;
@@ -54,7 +54,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_select_x${BATCH_TILE}(
vacc${ABC[N]} = vbias + vacc${ABC[N]} * vmultiplier${ABC[N]};
$for N in range(BATCH_TILE):
- int32_t vout${ABC[N]} = asr_s32(vacc${ABC[N]}, 8);
+ int32_t vout${ABC[N]} = math_asr_s32(vacc${ABC[N]}, 8);
$for N in range(BATCH_TILE):
vout${ABC[N]} = math_max_s32(vout${ABC[N]}, ${OUTPUT_MIN});
@@ -72,7 +72,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_select_x${BATCH_TILE}(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y = (${XINT8_T}) vout;
@@ -82,7 +82,7 @@ void xnn_${DATATYPE.lower()}_vlrelu_ukernel__scalar_select_x${BATCH_TILE}(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, ${OUTPUT_MIN});
vout = math_min_s32(vout, ${OUTPUT_MAX});
*y++ = (${XINT8_T}) vout;
diff --git a/src/qu8-avgpool/9p8x-minmax-scalar-c1.c b/src/qu8-avgpool/9p8x-minmax-scalar-c1.c
index feeb859fc..dab8b0111 100644
--- a/src/qu8-avgpool/9p8x-minmax-scalar-c1.c
+++ b/src/qu8-avgpool/9p8x-minmax-scalar-c1.c
@@ -272,7 +272,7 @@ void xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1(
const int64_t vproduct = (int64_t) vacc * (int64_t) vmultiplier;
const int64_t vadjusted_product = vproduct - (int64_t) (vacc < 0);
- int32_t vout = (int32_t) asr_s64(vadjusted_product + vrounding, vshift);
+ int32_t vout = (int32_t) math_asr_s64(vadjusted_product + vrounding, vshift);
vout = vout < voutput_min ? voutput_min : vout;
vout = vout > voutput_max ? voutput_max : vout;
vout += voutput_zero_point;
diff --git a/src/qu8-avgpool/9x-minmax-scalar-c1.c b/src/qu8-avgpool/9x-minmax-scalar-c1.c
index ca66f5cad..127e57226 100644
--- a/src/qu8-avgpool/9x-minmax-scalar-c1.c
+++ b/src/qu8-avgpool/9x-minmax-scalar-c1.c
@@ -133,7 +133,7 @@ void xnn_qu8_avgpool_minmax_ukernel_9x__scalar_c1(
const int64_t vproduct = (int64_t) vacc * (int64_t) vmultiplier;
const int64_t vadjusted_product = vproduct - (int64_t) (vacc < 0);
- int32_t vout = (int32_t) asr_s64(vadjusted_product + vrounding, vshift);
+ int32_t vout = (int32_t) math_asr_s64(vadjusted_product + vrounding, vshift);
vout = vout < voutput_min ? voutput_min : vout;
vout = vout > voutput_max ? voutput_max : vout;
vout += voutput_zero_point;
diff --git a/src/qu8-requantization/gemmlowp-scalar.c b/src/qu8-requantization/gemmlowp-scalar.c
index 8d6ea0967..ecbbe2647 100644
--- a/src/qu8-requantization/gemmlowp-scalar.c
+++ b/src/qu8-requantization/gemmlowp-scalar.c
@@ -100,10 +100,10 @@ void xnn_qu8_requantize_gemmlowp__scalar(
const int32_t z_remainder = (z_q31product & remainder_mask) - (int32_t) (z_q31product < 0);
const int32_t w_remainder = (w_q31product & remainder_mask) - (int32_t) (w_q31product < 0);
- const int32_t x_scaled = asr_s32(x_q31product, shift) + (int32_t) (x_remainder > threshold);
- const int32_t y_scaled = asr_s32(y_q31product, shift) + (int32_t) (y_remainder > threshold);
- const int32_t z_scaled = asr_s32(z_q31product, shift) + (int32_t) (z_remainder > threshold);
- const int32_t w_scaled = asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold);
+ const int32_t x_scaled = math_asr_s32(x_q31product, shift) + (int32_t) (x_remainder > threshold);
+ const int32_t y_scaled = math_asr_s32(y_q31product, shift) + (int32_t) (y_remainder > threshold);
+ const int32_t z_scaled = math_asr_s32(z_q31product, shift) + (int32_t) (z_remainder > threshold);
+ const int32_t w_scaled = math_asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold);
// Clamp scaled value with zero point between (qmin - zero point) and (qmax - zero point).
const int32_t x_clamped = math_min_s32(math_max_s32(x_scaled, smin), smax);
diff --git a/src/qu8-requantization/rndna-scalar-signed64.c b/src/qu8-requantization/rndna-scalar-signed64.c
index 904cf90df..e70c1f2eb 100644
--- a/src/qu8-requantization/rndna-scalar-signed64.c
+++ b/src/qu8-requantization/rndna-scalar-signed64.c
@@ -65,10 +65,10 @@ void xnn_qu8_requantize_rndna__scalar_signed64(
// Note that although rounding is precomputed, it is dependent on shift value, and on processors with 64-bit
// "right shift with rounding" instruction each line below can be represented by just one such instruction
// (e.g. VRSHL.S64 on ARM NEON, SRSHL in ARM64 Advanced SIMD).
- const int32_t x_scaled = (int32_t) asr_s64(x_adjusted_product + rounding, shift);
- const int32_t y_scaled = (int32_t) asr_s64(y_adjusted_product + rounding, shift);
- const int32_t z_scaled = (int32_t) asr_s64(z_adjusted_product + rounding, shift);
- const int32_t w_scaled = (int32_t) asr_s64(w_adjusted_product + rounding, shift);
+ const int32_t x_scaled = (int32_t) math_asr_s64(x_adjusted_product + rounding, shift);
+ const int32_t y_scaled = (int32_t) math_asr_s64(y_adjusted_product + rounding, shift);
+ const int32_t z_scaled = (int32_t) math_asr_s64(z_adjusted_product + rounding, shift);
+ const int32_t w_scaled = (int32_t) math_asr_s64(w_adjusted_product + rounding, shift);
// Clamp scaled value with zero point between (qmin - zero point) and (qmax - zero point).
const int32_t x_clamped = math_min_s32(math_max_s32(x_scaled, smin), smax);
diff --git a/src/qu8-vadd/gen/minmax-scalar-x1.c b/src/qu8-vadd/gen/minmax-scalar-x1.c
index 79e3ee526..3fb515adf 100644
--- a/src/qu8-vadd/gen/minmax-scalar-x1.c
+++ b/src/qu8-vadd/gen/minmax-scalar-x1.c
@@ -33,7 +33,7 @@ void xnn_qu8_vadd_minmax_ukernel__scalar_x1(
const int32_t vb = *input_b++;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (uint8_t) (vout + voutput_zero_point);
diff --git a/src/qu8-vadd/gen/minmax-scalar-x2.c b/src/qu8-vadd/gen/minmax-scalar-x2.c
index 05203dcb7..f65227db2 100644
--- a/src/qu8-vadd/gen/minmax-scalar-x2.c
+++ b/src/qu8-vadd/gen/minmax-scalar-x2.c
@@ -42,8 +42,8 @@ void xnn_qu8_vadd_minmax_ukernel__scalar_x2(
vacc0 += vb0 * vb_multiplier;
vacc1 += vb1 * vb_multiplier;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -63,7 +63,7 @@ void xnn_qu8_vadd_minmax_ukernel__scalar_x2(
const int32_t vb = *input_b;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (uint8_t) (vout + voutput_zero_point);
diff --git a/src/qu8-vadd/gen/minmax-scalar-x4.c b/src/qu8-vadd/gen/minmax-scalar-x4.c
index f433225f0..95e4148c2 100644
--- a/src/qu8-vadd/gen/minmax-scalar-x4.c
+++ b/src/qu8-vadd/gen/minmax-scalar-x4.c
@@ -50,10 +50,10 @@ void xnn_qu8_vadd_minmax_ukernel__scalar_x4(
vacc2 += vb2 * vb_multiplier;
vacc3 += vb3 * vb_multiplier;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
- int32_t vout2 = asr_s32(vacc2, vshift);
- int32_t vout3 = asr_s32(vacc3, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
+ int32_t vout2 = math_asr_s32(vacc2, vshift);
+ int32_t vout3 = math_asr_s32(vacc3, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -82,7 +82,7 @@ void xnn_qu8_vadd_minmax_ukernel__scalar_x4(
const int32_t vb = *input_b++;
const int32_t vacc = vbias + va * va_multiplier + vb * vb_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (uint8_t) (vout + voutput_zero_point);
diff --git a/src/qu8-vaddc/gen/minmax-scalar-x1.c b/src/qu8-vaddc/gen/minmax-scalar-x1.c
index 096b2fc7f..22502c59c 100644
--- a/src/qu8-vaddc/gen/minmax-scalar-x1.c
+++ b/src/qu8-vaddc/gen/minmax-scalar-x1.c
@@ -31,7 +31,7 @@ void xnn_qu8_vaddc_minmax_ukernel__scalar_x1(
const int32_t va = *input_a++;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (uint8_t) (vout + voutput_zero_point);
diff --git a/src/qu8-vaddc/gen/minmax-scalar-x2.c b/src/qu8-vaddc/gen/minmax-scalar-x2.c
index ff775bf14..015532884 100644
--- a/src/qu8-vaddc/gen/minmax-scalar-x2.c
+++ b/src/qu8-vaddc/gen/minmax-scalar-x2.c
@@ -36,8 +36,8 @@ void xnn_qu8_vaddc_minmax_ukernel__scalar_x2(
const int32_t vacc1 = vbias + va1 * va_multiplier;
input_b += 2;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -56,7 +56,7 @@ void xnn_qu8_vaddc_minmax_ukernel__scalar_x2(
const int32_t va = *input_a;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (uint8_t) (vout + voutput_zero_point);
diff --git a/src/qu8-vaddc/gen/minmax-scalar-x4.c b/src/qu8-vaddc/gen/minmax-scalar-x4.c
index d77b74f0c..fc6fce367 100644
--- a/src/qu8-vaddc/gen/minmax-scalar-x4.c
+++ b/src/qu8-vaddc/gen/minmax-scalar-x4.c
@@ -40,10 +40,10 @@ void xnn_qu8_vaddc_minmax_ukernel__scalar_x4(
const int32_t vacc3 = vbias + va3 * va_multiplier;
input_b += 4;
- int32_t vout0 = asr_s32(vacc0, vshift);
- int32_t vout1 = asr_s32(vacc1, vshift);
- int32_t vout2 = asr_s32(vacc2, vshift);
- int32_t vout3 = asr_s32(vacc3, vshift);
+ int32_t vout0 = math_asr_s32(vacc0, vshift);
+ int32_t vout1 = math_asr_s32(vacc1, vshift);
+ int32_t vout2 = math_asr_s32(vacc2, vshift);
+ int32_t vout3 = math_asr_s32(vacc3, vshift);
vout0 = math_max_s32(vout0, voutput_min_less_zero_point);
vout1 = math_max_s32(vout1, voutput_min_less_zero_point);
@@ -71,7 +71,7 @@ void xnn_qu8_vaddc_minmax_ukernel__scalar_x4(
const int32_t va = *input_a++;
const int32_t vacc = vbias + va * va_multiplier;
- int32_t vout = asr_s32(vacc, vshift);
+ int32_t vout = math_asr_s32(vacc, vshift);
vout = math_max_s32(vout, voutput_min_less_zero_point);
vout = math_min_s32(vout, voutput_max_less_zero_point);
*output++ = (uint8_t) (vout + voutput_zero_point);
diff --git a/src/qu8-vcvt/gen/vcvt-armv6simd-x4.c b/src/qu8-vcvt/gen/vcvt-armv6simd-x4.c
index cafbfd2e6..d78c12a62 100644
--- a/src/qu8-vcvt/gen/vcvt-armv6simd-x4.c
+++ b/src/qu8-vcvt/gen/vcvt-armv6simd-x4.c
@@ -38,10 +38,10 @@ void xnn_qu8_vcvt_ukernel__armv6simd_x4(
int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
int32_t vacc3 = __smlawt(vmultiplier, vx13, vbias);
- vacc0 = __usat(asr_s32(vacc0, 1), 8);
- vacc1 = __usat(asr_s32(vacc1, 1), 8);
- vacc2 = __usat(asr_s32(vacc2, 1), 8);
- vacc3 = __usat(asr_s32(vacc3, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 1), 8);
+ vacc2 = __usat(math_asr_s32(vacc2, 1), 8);
+ vacc3 = __usat(math_asr_s32(vacc3, 1), 8);
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
@@ -59,13 +59,13 @@ void xnn_qu8_vcvt_ukernel__armv6simd_x4(
int32_t vacc1 = __smlawb(vmultiplier, vx13, vbias);
const int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
- vacc0 = __usat(asr_s32(vacc0, 1), 8);
- vacc1 = __usat(asr_s32(vacc1, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 1), 8);
if (n & (2 * sizeof(uint8_t))) {
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
- vacc0 = __usat(asr_s32(vacc2, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc2, 1), 8);
y += 2;
}
if (n & (1 * sizeof(uint8_t))) {
diff --git a/src/qu8-vcvt/gen/vcvt-armv6simd-x8.c b/src/qu8-vcvt/gen/vcvt-armv6simd-x8.c
index cb24c6167..bd57725f4 100644
--- a/src/qu8-vcvt/gen/vcvt-armv6simd-x8.c
+++ b/src/qu8-vcvt/gen/vcvt-armv6simd-x8.c
@@ -45,14 +45,14 @@ void xnn_qu8_vcvt_ukernel__armv6simd_x8(
int32_t vacc6 = __smlawt(vmultiplier, vx46, vbias);
int32_t vacc7 = __smlawt(vmultiplier, vx57, vbias);
- vacc0 = __usat(asr_s32(vacc0, 1), 8);
- vacc1 = __usat(asr_s32(vacc1, 1), 8);
- vacc2 = __usat(asr_s32(vacc2, 1), 8);
- vacc3 = __usat(asr_s32(vacc3, 1), 8);
- vacc4 = __usat(asr_s32(vacc4, 1), 8);
- vacc5 = __usat(asr_s32(vacc5, 1), 8);
- vacc6 = __usat(asr_s32(vacc6, 1), 8);
- vacc7 = __usat(asr_s32(vacc7, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 1), 8);
+ vacc2 = __usat(math_asr_s32(vacc2, 1), 8);
+ vacc3 = __usat(math_asr_s32(vacc3, 1), 8);
+ vacc4 = __usat(math_asr_s32(vacc4, 1), 8);
+ vacc5 = __usat(math_asr_s32(vacc5, 1), 8);
+ vacc6 = __usat(math_asr_s32(vacc6, 1), 8);
+ vacc7 = __usat(math_asr_s32(vacc7, 1), 8);
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
@@ -76,10 +76,10 @@ void xnn_qu8_vcvt_ukernel__armv6simd_x8(
int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
int32_t vacc3 = __smlawt(vmultiplier, vx13, vbias);
- vacc0 = __usat(asr_s32(vacc0, 1), 8);
- vacc1 = __usat(asr_s32(vacc1, 1), 8);
- vacc2 = __usat(asr_s32(vacc2, 1), 8);
- vacc3 = __usat(asr_s32(vacc3, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 1), 8);
+ vacc2 = __usat(math_asr_s32(vacc2, 1), 8);
+ vacc3 = __usat(math_asr_s32(vacc3, 1), 8);
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
@@ -97,13 +97,13 @@ void xnn_qu8_vcvt_ukernel__armv6simd_x8(
int32_t vacc1 = __smlawb(vmultiplier, vx13, vbias);
const int32_t vacc2 = __smlawt(vmultiplier, vx02, vbias);
- vacc0 = __usat(asr_s32(vacc0, 1), 8);
- vacc1 = __usat(asr_s32(vacc1, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 1), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 1), 8);
if (n & (2 * sizeof(uint8_t))) {
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
- vacc0 = __usat(asr_s32(vacc2, 1), 8);
+ vacc0 = __usat(math_asr_s32(vacc2, 1), 8);
y += 2;
}
if (n & (1 * sizeof(uint8_t))) {
diff --git a/src/qu8-vcvt/gen/vcvt-scalar-x1.c b/src/qu8-vcvt/gen/vcvt-scalar-x1.c
index b4fc8fa63..9c99ce346 100644
--- a/src/qu8-vcvt/gen/vcvt-scalar-x1.c
+++ b/src/qu8-vcvt/gen/vcvt-scalar-x1.c
@@ -25,7 +25,7 @@ void xnn_qu8_vcvt_ukernel__scalar_x1(
int32_t vacc = *x++;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y++ = (uint8_t) vout;
diff --git a/src/qu8-vcvt/gen/vcvt-scalar-x2.c b/src/qu8-vcvt/gen/vcvt-scalar-x2.c
index 1ceacd9b2..f5399fbf1 100644
--- a/src/qu8-vcvt/gen/vcvt-scalar-x2.c
+++ b/src/qu8-vcvt/gen/vcvt-scalar-x2.c
@@ -29,8 +29,8 @@ void xnn_qu8_vcvt_ukernel__scalar_x2(
vacc0 = vbias + vacc0 * vmultiplier;
vacc1 = vbias + vacc1 * vmultiplier;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
vout0 = math_max_s32(vout0, 0);
vout1 = math_max_s32(vout1, 0);
@@ -46,7 +46,7 @@ void xnn_qu8_vcvt_ukernel__scalar_x2(
int32_t vacc = *x;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y = (uint8_t) vout;
diff --git a/src/qu8-vcvt/gen/vcvt-scalar-x4.c b/src/qu8-vcvt/gen/vcvt-scalar-x4.c
index f1568973b..d24df8932 100644
--- a/src/qu8-vcvt/gen/vcvt-scalar-x4.c
+++ b/src/qu8-vcvt/gen/vcvt-scalar-x4.c
@@ -33,10 +33,10 @@ void xnn_qu8_vcvt_ukernel__scalar_x4(
vacc2 = vbias + vacc2 * vmultiplier;
vacc3 = vbias + vacc3 * vmultiplier;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
- int32_t vout2 = asr_s32(vacc2, 8);
- int32_t vout3 = asr_s32(vacc3, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
+ int32_t vout2 = math_asr_s32(vacc2, 8);
+ int32_t vout3 = math_asr_s32(vacc3, 8);
vout0 = math_max_s32(vout0, 0);
vout1 = math_max_s32(vout1, 0);
@@ -59,7 +59,7 @@ void xnn_qu8_vcvt_ukernel__scalar_x4(
int32_t vacc = *x++;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y++ = (uint8_t) vout;
diff --git a/src/qu8-vlrelu/gen/vlrelu-armv6simd-x4.c b/src/qu8-vlrelu/gen/vlrelu-armv6simd-x4.c
index 7cc3c3067..6f0487e7f 100644
--- a/src/qu8-vlrelu/gen/vlrelu-armv6simd-x4.c
+++ b/src/qu8-vlrelu/gen/vlrelu-armv6simd-x4.c
@@ -44,10 +44,10 @@ void xnn_qu8_vlrelu_ukernel__armv6simd_x4(
int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
int32_t vacc3 = __smlatt(vmultiplier13, vx13, vbias);
- vacc0 = __usat(asr_s32(vacc0, 8), 8);
- vacc1 = __usat(asr_s32(vacc1, 8), 8);
- vacc2 = __usat(asr_s32(vacc2, 8), 8);
- vacc3 = __usat(asr_s32(vacc3, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 8), 8);
+ vacc2 = __usat(math_asr_s32(vacc2, 8), 8);
+ vacc3 = __usat(math_asr_s32(vacc3, 8), 8);
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
@@ -70,13 +70,13 @@ void xnn_qu8_vlrelu_ukernel__armv6simd_x4(
int32_t vacc1 = __smlabb(vmultiplier13, vx13, vbias);
const int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
- vacc0 = __usat(asr_s32(vacc0, 8), 8);
- vacc1 = __usat(asr_s32(vacc1, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 8), 8);
if (n & (2 * sizeof(uint8_t))) {
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
- vacc0 = __usat(asr_s32(vacc2, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc2, 8), 8);
y += 2;
}
if (n & (1 * sizeof(uint8_t))) {
diff --git a/src/qu8-vlrelu/gen/vlrelu-armv6simd-x8.c b/src/qu8-vlrelu/gen/vlrelu-armv6simd-x8.c
index 565bb1e5f..8f3142080 100644
--- a/src/qu8-vlrelu/gen/vlrelu-armv6simd-x8.c
+++ b/src/qu8-vlrelu/gen/vlrelu-armv6simd-x8.c
@@ -55,14 +55,14 @@ void xnn_qu8_vlrelu_ukernel__armv6simd_x8(
int32_t vacc6 = __smlatt(vmultiplier46, vx46, vbias);
int32_t vacc7 = __smlatt(vmultiplier57, vx57, vbias);
- vacc0 = __usat(asr_s32(vacc0, 8), 8);
- vacc1 = __usat(asr_s32(vacc1, 8), 8);
- vacc2 = __usat(asr_s32(vacc2, 8), 8);
- vacc3 = __usat(asr_s32(vacc3, 8), 8);
- vacc4 = __usat(asr_s32(vacc4, 8), 8);
- vacc5 = __usat(asr_s32(vacc5, 8), 8);
- vacc6 = __usat(asr_s32(vacc6, 8), 8);
- vacc7 = __usat(asr_s32(vacc7, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 8), 8);
+ vacc2 = __usat(math_asr_s32(vacc2, 8), 8);
+ vacc3 = __usat(math_asr_s32(vacc3, 8), 8);
+ vacc4 = __usat(math_asr_s32(vacc4, 8), 8);
+ vacc5 = __usat(math_asr_s32(vacc5, 8), 8);
+ vacc6 = __usat(math_asr_s32(vacc6, 8), 8);
+ vacc7 = __usat(math_asr_s32(vacc7, 8), 8);
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
@@ -91,10 +91,10 @@ void xnn_qu8_vlrelu_ukernel__armv6simd_x8(
int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
int32_t vacc3 = __smlatt(vmultiplier13, vx13, vbias);
- vacc0 = __usat(asr_s32(vacc0, 8), 8);
- vacc1 = __usat(asr_s32(vacc1, 8), 8);
- vacc2 = __usat(asr_s32(vacc2, 8), 8);
- vacc3 = __usat(asr_s32(vacc3, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 8), 8);
+ vacc2 = __usat(math_asr_s32(vacc2, 8), 8);
+ vacc3 = __usat(math_asr_s32(vacc3, 8), 8);
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
@@ -117,13 +117,13 @@ void xnn_qu8_vlrelu_ukernel__armv6simd_x8(
int32_t vacc1 = __smlabb(vmultiplier13, vx13, vbias);
const int32_t vacc2 = __smlatt(vmultiplier02, vx02, vbias);
- vacc0 = __usat(asr_s32(vacc0, 8), 8);
- vacc1 = __usat(asr_s32(vacc1, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc0, 8), 8);
+ vacc1 = __usat(math_asr_s32(vacc1, 8), 8);
if (n & (2 * sizeof(uint8_t))) {
y[0] = (uint8_t) vacc0;
y[1] = (uint8_t) vacc1;
- vacc0 = __usat(asr_s32(vacc2, 8), 8);
+ vacc0 = __usat(math_asr_s32(vacc2, 8), 8);
y += 2;
}
if (n & (1 * sizeof(uint8_t))) {
diff --git a/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x1.c b/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x1.c
index 33e9176d7..c0ce8a70b 100644
--- a/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x1.c
+++ b/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x1.c
@@ -25,10 +25,10 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x1(
const int32_t vbias = params->scalar_andxor.bias;
do {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y++ = (uint8_t) vout;
diff --git a/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x2.c b/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x2.c
index 8e4f64fab..81eb91fdf 100644
--- a/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x2.c
+++ b/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x2.c
@@ -31,8 +31,8 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x2(
vacc0 -= vinput_zero_point;
vacc1 -= vinput_zero_point;
- int32_t vmultiplier0 = asr_s32(vacc0, 31);
- int32_t vmultiplier1 = asr_s32(vacc1, 31);
+ int32_t vmultiplier0 = math_asr_s32(vacc0, 31);
+ int32_t vmultiplier1 = math_asr_s32(vacc1, 31);
vmultiplier0 &= vmultiplier_diff;
vmultiplier1 &= vmultiplier_diff;
@@ -43,8 +43,8 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x2(
vacc0 = vbias + vacc0 * vmultiplier0;
vacc1 = vbias + vacc1 * vmultiplier1;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
vout0 = math_max_s32(vout0, 0);
vout1 = math_max_s32(vout1, 0);
@@ -58,10 +58,10 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x2(
}
if XNN_UNLIKELY(n != 0) {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y = (uint8_t) vout;
diff --git a/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x4.c b/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x4.c
index 7b52bb64e..da80ee8bb 100644
--- a/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x4.c
+++ b/src/qu8-vlrelu/gen/vlrelu-scalar-andxor-x4.c
@@ -35,10 +35,10 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x4(
vacc2 -= vinput_zero_point;
vacc3 -= vinput_zero_point;
- int32_t vmultiplier0 = asr_s32(vacc0, 31);
- int32_t vmultiplier1 = asr_s32(vacc1, 31);
- int32_t vmultiplier2 = asr_s32(vacc2, 31);
- int32_t vmultiplier3 = asr_s32(vacc3, 31);
+ int32_t vmultiplier0 = math_asr_s32(vacc0, 31);
+ int32_t vmultiplier1 = math_asr_s32(vacc1, 31);
+ int32_t vmultiplier2 = math_asr_s32(vacc2, 31);
+ int32_t vmultiplier3 = math_asr_s32(vacc3, 31);
vmultiplier0 &= vmultiplier_diff;
vmultiplier1 &= vmultiplier_diff;
@@ -55,10 +55,10 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x4(
vacc2 = vbias + vacc2 * vmultiplier2;
vacc3 = vbias + vacc3 * vmultiplier3;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
- int32_t vout2 = asr_s32(vacc2, 8);
- int32_t vout3 = asr_s32(vacc3, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
+ int32_t vout2 = math_asr_s32(vacc2, 8);
+ int32_t vout3 = math_asr_s32(vacc3, 8);
vout0 = math_max_s32(vout0, 0);
vout1 = math_max_s32(vout1, 0);
@@ -79,10 +79,10 @@ void xnn_qu8_vlrelu_ukernel__scalar_andxor_x4(
if XNN_UNLIKELY(n != 0) {
do {
int32_t vacc = (int32_t) *x++ - vinput_zero_point;
- const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & asr_s32(vacc, 31));
+ const int32_t vmultiplier = vmultiplier_base ^ (vmultiplier_diff & math_asr_s32(vacc, 31));
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y++ = (uint8_t) vout;
diff --git a/src/qu8-vlrelu/gen/vlrelu-scalar-select-x1.c b/src/qu8-vlrelu/gen/vlrelu-scalar-select-x1.c
index 8db62d4ba..479336be4 100644
--- a/src/qu8-vlrelu/gen/vlrelu-scalar-select-x1.c
+++ b/src/qu8-vlrelu/gen/vlrelu-scalar-select-x1.c
@@ -28,7 +28,7 @@ void xnn_qu8_vlrelu_ukernel__scalar_select_x1(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y++ = (uint8_t) vout;
diff --git a/src/qu8-vlrelu/gen/vlrelu-scalar-select-x2.c b/src/qu8-vlrelu/gen/vlrelu-scalar-select-x2.c
index eb6164b26..2ba144e7f 100644
--- a/src/qu8-vlrelu/gen/vlrelu-scalar-select-x2.c
+++ b/src/qu8-vlrelu/gen/vlrelu-scalar-select-x2.c
@@ -37,8 +37,8 @@ void xnn_qu8_vlrelu_ukernel__scalar_select_x2(
vacc0 = vbias + vacc0 * vmultiplier0;
vacc1 = vbias + vacc1 * vmultiplier1;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
vout0 = math_max_s32(vout0, 0);
vout1 = math_max_s32(vout1, 0);
@@ -55,7 +55,7 @@ void xnn_qu8_vlrelu_ukernel__scalar_select_x2(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y = (uint8_t) vout;
diff --git a/src/qu8-vlrelu/gen/vlrelu-scalar-select-x4.c b/src/qu8-vlrelu/gen/vlrelu-scalar-select-x4.c
index 6d5f36165..4300bd2c0 100644
--- a/src/qu8-vlrelu/gen/vlrelu-scalar-select-x4.c
+++ b/src/qu8-vlrelu/gen/vlrelu-scalar-select-x4.c
@@ -45,10 +45,10 @@ void xnn_qu8_vlrelu_ukernel__scalar_select_x4(
vacc2 = vbias + vacc2 * vmultiplier2;
vacc3 = vbias + vacc3 * vmultiplier3;
- int32_t vout0 = asr_s32(vacc0, 8);
- int32_t vout1 = asr_s32(vacc1, 8);
- int32_t vout2 = asr_s32(vacc2, 8);
- int32_t vout3 = asr_s32(vacc3, 8);
+ int32_t vout0 = math_asr_s32(vacc0, 8);
+ int32_t vout1 = math_asr_s32(vacc1, 8);
+ int32_t vout2 = math_asr_s32(vacc2, 8);
+ int32_t vout3 = math_asr_s32(vacc3, 8);
vout0 = math_max_s32(vout0, 0);
vout1 = math_max_s32(vout1, 0);
@@ -72,7 +72,7 @@ void xnn_qu8_vlrelu_ukernel__scalar_select_x4(
const int32_t vmultiplier = XNN_UNPREDICTABLE(vacc >= 0) ? vpositive_multiplier : vnegative_multiplier;
vacc = vbias + vacc * vmultiplier;
- int32_t vout = asr_s32(vacc, 8);
+ int32_t vout = math_asr_s32(vacc, 8);
vout = math_max_s32(vout, 0);
vout = math_min_s32(vout, 255);
*y++ = (uint8_t) vout;
diff --git a/src/s16-window/gen/scalar-x1.c b/src/s16-window/gen/scalar-x1.c
index fcf4323b7..623f08390 100644
--- a/src/s16-window/gen/scalar-x1.c
+++ b/src/s16-window/gen/scalar-x1.c
@@ -39,7 +39,7 @@ void xnn_s16_window_ukernel__scalar_x1(
int32_t vout = ((int32_t) input[0] * (int32_t) w[0]);
++input;
++w;
- vout = asr_s32(vout, shift);
+ vout = math_asr_s32(vout, shift);
vout = math_max_s32(vout, INT16_MIN);
vout = math_min_s32(vout, INT16_MAX);
output[0] = (int16_t)(vout);
diff --git a/src/s16-window/gen/scalar-x2.c b/src/s16-window/gen/scalar-x2.c
index d0dfd5928..39a5b48ad 100644
--- a/src/s16-window/gen/scalar-x2.c
+++ b/src/s16-window/gen/scalar-x2.c
@@ -45,8 +45,8 @@ void xnn_s16_window_ukernel__scalar_x2(
int32_t vout0 = (int32_t) vi0 * (int32_t) w0;
int32_t vout1 = (int32_t) vi1 * (int32_t) w1;
- vout0 = asr_s32(vout0, shift);
- vout1 = asr_s32(vout1, shift);
+ vout0 = math_asr_s32(vout0, shift);
+ vout1 = math_asr_s32(vout1, shift);
vout0 = math_max_s32(vout0, INT16_MIN);
vout1 = math_max_s32(vout1, INT16_MIN);
@@ -65,7 +65,7 @@ void xnn_s16_window_ukernel__scalar_x2(
int32_t vout = ((int32_t) input[0] * (int32_t) w[0]);
++input;
++w;
- vout = asr_s32(vout, shift);
+ vout = math_asr_s32(vout, shift);
vout = math_max_s32(vout, INT16_MIN);
vout = math_min_s32(vout, INT16_MAX);
output[0] = (int16_t)(vout);
diff --git a/src/s16-window/gen/scalar-x3.c b/src/s16-window/gen/scalar-x3.c
index ab5901e8c..6bd194224 100644
--- a/src/s16-window/gen/scalar-x3.c
+++ b/src/s16-window/gen/scalar-x3.c
@@ -48,9 +48,9 @@ void xnn_s16_window_ukernel__scalar_x3(
int32_t vout1 = (int32_t) vi1 * (int32_t) w1;
int32_t vout2 = (int32_t) vi2 * (int32_t) w2;
- vout0 = asr_s32(vout0, shift);
- vout1 = asr_s32(vout1, shift);
- vout2 = asr_s32(vout2, shift);
+ vout0 = math_asr_s32(vout0, shift);
+ vout1 = math_asr_s32(vout1, shift);
+ vout2 = math_asr_s32(vout2, shift);
vout0 = math_max_s32(vout0, INT16_MIN);
vout1 = math_max_s32(vout1, INT16_MIN);
@@ -72,7 +72,7 @@ void xnn_s16_window_ukernel__scalar_x3(
int32_t vout = ((int32_t) input[0] * (int32_t) w[0]);
++input;
++w;
- vout = asr_s32(vout, shift);
+ vout = math_asr_s32(vout, shift);
vout = math_max_s32(vout, INT16_MIN);
vout = math_min_s32(vout, INT16_MAX);
output[0] = (int16_t)(vout);
diff --git a/src/s16-window/gen/scalar-x4.c b/src/s16-window/gen/scalar-x4.c
index 100a30af6..081712590 100644
--- a/src/s16-window/gen/scalar-x4.c
+++ b/src/s16-window/gen/scalar-x4.c
@@ -51,10 +51,10 @@ void xnn_s16_window_ukernel__scalar_x4(
int32_t vout2 = (int32_t) vi2 * (int32_t) w2;
int32_t vout3 = (int32_t) vi3 * (int32_t) w3;
- vout0 = asr_s32(vout0, shift);
- vout1 = asr_s32(vout1, shift);
- vout2 = asr_s32(vout2, shift);
- vout3 = asr_s32(vout3, shift);
+ vout0 = math_asr_s32(vout0, shift);
+ vout1 = math_asr_s32(vout1, shift);
+ vout2 = math_asr_s32(vout2, shift);
+ vout3 = math_asr_s32(vout3, shift);
vout0 = math_max_s32(vout0, INT16_MIN);
vout1 = math_max_s32(vout1, INT16_MIN);
@@ -79,7 +79,7 @@ void xnn_s16_window_ukernel__scalar_x4(
int32_t vout = ((int32_t) input[0] * (int32_t) w[0]);
++input;
++w;
- vout = asr_s32(vout, shift);
+ vout = math_asr_s32(vout, shift);
vout = math_max_s32(vout, INT16_MIN);
vout = math_min_s32(vout, INT16_MAX);
output[0] = (int16_t)(vout);
diff --git a/src/s16-window/scalar.c.in b/src/s16-window/scalar.c.in
index d88835f5d..229cf9a8c 100644
--- a/src/s16-window/scalar.c.in
+++ b/src/s16-window/scalar.c.in
@@ -44,7 +44,7 @@ void xnn_s16_window_ukernel__scalar_x${CHANNEL_TILE}(
int32_t vout${C} = (int32_t) vi${C} * (int32_t) w${C};
$for C in range(CHANNEL_TILE):
- vout${C} = asr_s32(vout${C}, shift);
+ vout${C} = math_asr_s32(vout${C}, shift);
$for C in range(CHANNEL_TILE):
vout${C} = math_max_s32(vout${C}, INT16_MIN);
@@ -63,7 +63,7 @@ void xnn_s16_window_ukernel__scalar_x${CHANNEL_TILE}(
int32_t vout = ((int32_t) input[0] * (int32_t) w[0]);
++input;
++w;
- vout = asr_s32(vout, shift);
+ vout = math_asr_s32(vout, shift);
vout = math_max_s32(vout, INT16_MIN);
vout = math_min_s32(vout, INT16_MAX);
output[0] = (int16_t)(vout);
diff --git a/src/s8-ibilinear/gen/scalar-c1.c b/src/s8-ibilinear/gen/scalar-c1.c
index 4d3c19421..f2246e766 100644
--- a/src/s8-ibilinear/gen/scalar-c1.c
+++ b/src/s8-ibilinear/gen/scalar-c1.c
@@ -55,7 +55,7 @@ void xnn_s8_ibilinear_ukernel__scalar_c1(
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
diff --git a/src/s8-ibilinear/gen/scalar-c2.c b/src/s8-ibilinear/gen/scalar-c2.c
index d435fa0ac..b1918ddb0 100644
--- a/src/s8-ibilinear/gen/scalar-c2.c
+++ b/src/s8-ibilinear/gen/scalar-c2.c
@@ -69,8 +69,8 @@ void xnn_s8_ibilinear_ukernel__scalar_c2(
const int32_t vacc0 = (int32_t) ((uint32_t) vt0 << 11) + vd0 * valphav;
const int32_t vacc1 = (int32_t) ((uint32_t) vt1 << 11) + vd1 * valphav;
- const int32_t vo0 = asr_s32(vacc0 + vrounding, 22);
- const int32_t vo1 = asr_s32(vacc1 + vrounding, 22);
+ const int32_t vo0 = math_asr_s32(vacc0 + vrounding, 22);
+ const int32_t vo1 = math_asr_s32(vacc1 + vrounding, 22);
output[0] = (int8_t) vo0;
output[1] = (int8_t) vo1;
@@ -92,7 +92,7 @@ void xnn_s8_ibilinear_ukernel__scalar_c2(
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
}
diff --git a/src/s8-ibilinear/gen/scalar-c4.c b/src/s8-ibilinear/gen/scalar-c4.c
index 8ff553f03..b92c079df 100644
--- a/src/s8-ibilinear/gen/scalar-c4.c
+++ b/src/s8-ibilinear/gen/scalar-c4.c
@@ -89,10 +89,10 @@ void xnn_s8_ibilinear_ukernel__scalar_c4(
const int32_t vacc2 = (int32_t) ((uint32_t) vt2 << 11) + vd2 * valphav;
const int32_t vacc3 = (int32_t) ((uint32_t) vt3 << 11) + vd3 * valphav;
- const int32_t vo0 = asr_s32(vacc0 + vrounding, 22);
- const int32_t vo1 = asr_s32(vacc1 + vrounding, 22);
- const int32_t vo2 = asr_s32(vacc2 + vrounding, 22);
- const int32_t vo3 = asr_s32(vacc3 + vrounding, 22);
+ const int32_t vo0 = math_asr_s32(vacc0 + vrounding, 22);
+ const int32_t vo1 = math_asr_s32(vacc1 + vrounding, 22);
+ const int32_t vo2 = math_asr_s32(vacc2 + vrounding, 22);
+ const int32_t vo3 = math_asr_s32(vacc3 + vrounding, 22);
output[0] = (int8_t) vo0;
output[1] = (int8_t) vo1;
@@ -116,7 +116,7 @@ void xnn_s8_ibilinear_ukernel__scalar_c4(
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
}
diff --git a/src/s8-ibilinear/scalar.c.in b/src/s8-ibilinear/scalar.c.in
index 266d7fefb..b29d08a7d 100644
--- a/src/s8-ibilinear/scalar.c.in
+++ b/src/s8-ibilinear/scalar.c.in
@@ -66,7 +66,7 @@ void xnn_${DATATYPE.lower()}_ibilinear_ukernel__scalar_c${CHANNEL_TILE}${"" if P
const int32_t vacc${ABC[C]} = (int32_t) ((uint32_t) vt${ABC[C]} << 11) + vd${ABC[C]} * valphav;
$for C in range(CHANNEL_TILE):
- const int32_t vo${ABC[C]} = asr_s32(vacc${ABC[C]} + vrounding, 22);
+ const int32_t vo${ABC[C]} = math_asr_s32(vacc${ABC[C]} + vrounding, 22);
$for C in range(CHANNEL_TILE):
output[${C}] = (${XINT8_T}) vo${ABC[C]};
@@ -88,7 +88,7 @@ void xnn_${DATATYPE.lower()}_ibilinear_ukernel__scalar_c${CHANNEL_TILE}${"" if P
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
}
@@ -109,7 +109,7 @@ void xnn_${DATATYPE.lower()}_ibilinear_ukernel__scalar_c${CHANNEL_TILE}${"" if P
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
diff --git a/src/u8-ibilinear/gen/scalar-c1.c b/src/u8-ibilinear/gen/scalar-c1.c
index a17287fc6..de03b9a01 100644
--- a/src/u8-ibilinear/gen/scalar-c1.c
+++ b/src/u8-ibilinear/gen/scalar-c1.c
@@ -55,7 +55,7 @@ void xnn_u8_ibilinear_ukernel__scalar_c1(
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
diff --git a/src/u8-ibilinear/gen/scalar-c2.c b/src/u8-ibilinear/gen/scalar-c2.c
index 33b18d821..5f398122c 100644
--- a/src/u8-ibilinear/gen/scalar-c2.c
+++ b/src/u8-ibilinear/gen/scalar-c2.c
@@ -69,8 +69,8 @@ void xnn_u8_ibilinear_ukernel__scalar_c2(
const int32_t vacc0 = (int32_t) ((uint32_t) vt0 << 11) + vd0 * valphav;
const int32_t vacc1 = (int32_t) ((uint32_t) vt1 << 11) + vd1 * valphav;
- const int32_t vo0 = asr_s32(vacc0 + vrounding, 22);
- const int32_t vo1 = asr_s32(vacc1 + vrounding, 22);
+ const int32_t vo0 = math_asr_s32(vacc0 + vrounding, 22);
+ const int32_t vo1 = math_asr_s32(vacc1 + vrounding, 22);
output[0] = (uint8_t) vo0;
output[1] = (uint8_t) vo1;
@@ -92,7 +92,7 @@ void xnn_u8_ibilinear_ukernel__scalar_c2(
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
}
diff --git a/src/u8-ibilinear/gen/scalar-c4.c b/src/u8-ibilinear/gen/scalar-c4.c
index 318995666..438cfe3ce 100644
--- a/src/u8-ibilinear/gen/scalar-c4.c
+++ b/src/u8-ibilinear/gen/scalar-c4.c
@@ -89,10 +89,10 @@ void xnn_u8_ibilinear_ukernel__scalar_c4(
const int32_t vacc2 = (int32_t) ((uint32_t) vt2 << 11) + vd2 * valphav;
const int32_t vacc3 = (int32_t) ((uint32_t) vt3 << 11) + vd3 * valphav;
- const int32_t vo0 = asr_s32(vacc0 + vrounding, 22);
- const int32_t vo1 = asr_s32(vacc1 + vrounding, 22);
- const int32_t vo2 = asr_s32(vacc2 + vrounding, 22);
- const int32_t vo3 = asr_s32(vacc3 + vrounding, 22);
+ const int32_t vo0 = math_asr_s32(vacc0 + vrounding, 22);
+ const int32_t vo1 = math_asr_s32(vacc1 + vrounding, 22);
+ const int32_t vo2 = math_asr_s32(vacc2 + vrounding, 22);
+ const int32_t vo3 = math_asr_s32(vacc3 + vrounding, 22);
output[0] = (uint8_t) vo0;
output[1] = (uint8_t) vo1;
@@ -116,7 +116,7 @@ void xnn_u8_ibilinear_ukernel__scalar_c4(
const int32_t vacc = (int32_t) ((uint32_t) vt << 11) + vd * valphav;
- const int32_t vo = asr_s32(vacc + vrounding, 22);
+ const int32_t vo = math_asr_s32(vacc + vrounding, 22);
*output++ = vo;
}
diff --git a/src/xnnpack/math.h b/src/xnnpack/math.h
index 982af7638..deefacf6f 100644
--- a/src/xnnpack/math.h
+++ b/src/xnnpack/math.h
@@ -155,7 +155,7 @@ XNN_INLINE static float math_nonsign_mask_f32() {
#endif
XNN_IGNORE_SHIFT_BASE_UB
-XNN_INLINE static int32_t asr_s32(int32_t x, uint32_t n) {
+XNN_INLINE static int32_t math_asr_s32(int32_t x, uint32_t n) {
#ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND
#if XNN_ARCH_X86_64 || XNN_ARCH_ARM64
return (int32_t) ((uint64_t) (int64_t) x >> n);
@@ -168,7 +168,7 @@ XNN_INLINE static int32_t asr_s32(int32_t x, uint32_t n) {
}
XNN_IGNORE_SHIFT_BASE_UB
-XNN_INLINE static int64_t asr_s64(int64_t x, uint32_t n) {
+XNN_INLINE static int64_t math_asr_s64(int64_t x, uint32_t n) {
#ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND
return x >= 0 ? x >> n : ~(~x >> n);
#else
diff --git a/src/xnnpack/requantization.h b/src/xnnpack/requantization.h
index 18ddb9c44..a556acd33 100644
--- a/src/xnnpack/requantization.h
+++ b/src/xnnpack/requantization.h
@@ -173,7 +173,7 @@ static inline int8_t xnn_qs8_requantize_rndnu(
const int32_t max_less_zero_point = (int32_t) max - (int32_t) zero_point;
const int64_t abs_prescaled_input = (int64_t) input * (int64_t) multiplier;
- int32_t output = (int32_t) asr_s64(abs_prescaled_input + rounding, shift);
+ int32_t output = (int32_t) math_asr_s64(abs_prescaled_input + rounding, shift);
output = math_max_s32(output, min_less_zero_point);
output = math_min_s32(output, max_less_zero_point);
return (int8_t) (output + (int32_t) zero_point);
@@ -200,7 +200,7 @@ static inline uint8_t xnn_qu8_requantize_rndnu(
const int32_t max_less_zero_point = (int32_t) max - (int32_t) zero_point;
const int64_t abs_prescaled_input = (int64_t) input * (int64_t) multiplier;
- int32_t output = (int32_t) asr_s64(abs_prescaled_input + rounding, shift);
+ int32_t output = (int32_t) math_asr_s64(abs_prescaled_input + rounding, shift);
output = math_max_s32(output, min_less_zero_point);
output = math_min_s32(output, max_less_zero_point);
return (uint8_t) (output + (int32_t) zero_point);
@@ -214,7 +214,7 @@ static inline uint8_t xnn_qu8_quantize_add(
int32_t acc = params.scalar.bias + (int32_t) (uint32_t) a * params.scalar.a_multiplier + (int32_t) (uint32_t) b * params.scalar.b_multiplier;
// Shift right with rounding away from zero.
- acc = asr_s32(acc, params.scalar.shift);
+ acc = math_asr_s32(acc, params.scalar.shift);
// Clamp and add output zero point.
acc = math_max_s32(acc, params.scalar.output_min_less_zero_point);
@@ -230,7 +230,7 @@ static inline int8_t xnn_qs8_quantize_add(
int32_t acc = params.scalar.bias + (int32_t) a * params.scalar.a_multiplier + (int32_t) b * params.scalar.b_multiplier;
// Shift right with rounding away from zero.
- acc = asr_s32(acc, params.scalar.shift);
+ acc = math_asr_s32(acc, params.scalar.shift);
// Clamp and add output zero point.
acc = math_max_s32(acc, params.scalar.output_min_less_zero_point);
diff --git a/test/ibilinear-microkernel-tester.h b/test/ibilinear-microkernel-tester.h
index a11a7d267..34cc4acbd 100644
--- a/test/ibilinear-microkernel-tester.h
+++ b/test/ibilinear-microkernel-tester.h
@@ -229,7 +229,7 @@ class IBilinearMicrokernelTester {
for (size_t c = 0; c < channels(); c++) {
const int32_t alpha_h = packed_weights[i * 2 + 0];
const int32_t alpha_v = packed_weights[i * 2 + 1];
- const int32_t acc = asr_s32(
+ const int32_t acc = math_asr_s32(
int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
diff --git a/test/vcvt-microkernel-tester.h b/test/vcvt-microkernel-tester.h
index 1609cbb21..0961fbda4 100644
--- a/test/vcvt-microkernel-tester.h
+++ b/test/vcvt-microkernel-tester.h
@@ -266,7 +266,7 @@ class VCvtMicrokernelTester {
const int32_t multiplier = (int32_t) lrintf(-256.0f * scale());
for (size_t i = 0; i < batch_size(); i++) {
const int32_t input_value = (input_zero_point() - input[i]) << 7;
- int32_t output_value = asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
+ int32_t output_value = math_asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
output_value = std::min<int32_t>(output_value, std::numeric_limits<int8_t>::max());
output_value = std::max<int32_t>(output_value, std::numeric_limits<int8_t>::min());
output_ref[i] = static_cast<int8_t>(output_value);
@@ -345,7 +345,7 @@ class VCvtMicrokernelTester {
const int32_t multiplier = (int32_t) lrintf(-256.0f * scale());
for (size_t i = 0; i < batch_size(); i++) {
const int32_t input_value = (input_zero_point() - input[i]) << 7;
- int32_t output_value = asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
+ int32_t output_value = math_asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
output_value = std::min<int32_t>(output_value, std::numeric_limits<uint8_t>::max());
output_value = std::max<int32_t>(output_value, std::numeric_limits<uint8_t>::min());
output_ref[i] = static_cast<uint8_t>(output_value);
diff --git a/test/vlrelu-microkernel-tester.h b/test/vlrelu-microkernel-tester.h
index d73fdbf9b..ec9ed66ba 100644
--- a/test/vlrelu-microkernel-tester.h
+++ b/test/vlrelu-microkernel-tester.h
@@ -113,7 +113,7 @@ class VLReLUMicrokernelTester {
for (size_t i = 0; i < batch_size(); i++) {
const int32_t input_value = (input_zero_point() - input[i]) << 7;
const int32_t multiplier = input_value <= 0 ? positive_multiplier : negative_multiplier;
- int32_t output_value = asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
+ int32_t output_value = math_asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
output_value = std::min<int32_t>(output_value, std::numeric_limits<int8_t>::max());
output_value = std::max<int32_t>(output_value, std::numeric_limits<int8_t>::min());
output_ref[i] = static_cast<int8_t>(output_value);
@@ -158,7 +158,7 @@ class VLReLUMicrokernelTester {
for (size_t i = 0; i < batch_size(); i++) {
const int32_t input_value = (input_zero_point() - input[i]) << 7;
const int32_t multiplier = input_value <= 0 ? positive_multiplier : negative_multiplier;
- int32_t output_value = asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
+ int32_t output_value = math_asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point();
output_value = std::min<int32_t>(output_value, std::numeric_limits<uint8_t>::max());
output_value = std::max<int32_t>(output_value, std::numeric_limits<uint8_t>::min());
output_ref[i] = static_cast<uint8_t>(output_value);