aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bench/s16-vlshift.cc2
-rw-r--r--src/s16-vlshift/gen/neon-x16.c10
-rw-r--r--src/s16-vlshift/gen/neon-x24.c10
-rw-r--r--src/s16-vlshift/gen/neon-x32.c10
-rw-r--r--src/s16-vlshift/gen/neon-x8.c10
-rw-r--r--src/s16-vlshift/gen/scalar-x1.c9
-rw-r--r--src/s16-vlshift/gen/scalar-x2.c9
-rw-r--r--src/s16-vlshift/gen/scalar-x3.c9
-rw-r--r--src/s16-vlshift/gen/scalar-x4.c9
-rw-r--r--src/s16-vlshift/neon.c.in10
-rw-r--r--src/s16-vlshift/scalar.c.in9
-rw-r--r--src/xnnpack/microfnptr.h6
-rw-r--r--src/xnnpack/vlshift.h6
-rw-r--r--test/vlshift-microkernel-tester.h2
14 files changed, 53 insertions, 58 deletions
diff --git a/bench/s16-vlshift.cc b/bench/s16-vlshift.cc
index 3d3f23f37..749b2ee30 100644
--- a/bench/s16-vlshift.cc
+++ b/bench/s16-vlshift.cc
@@ -36,7 +36,7 @@ void vlshift(
std::iota(output.begin(), output.end(), 1);
for (auto _ : state) {
- vlshift(batch, input.data(), uint32_t(4), output.data());
+ vlshift(batch, input.data(), output.data(), 4 /* shift */);
}
const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
diff --git a/src/s16-vlshift/gen/neon-x16.c b/src/s16-vlshift/gen/neon-x16.c
index a2b3d9c83..2e2ca6a16 100644
--- a/src/s16-vlshift/gen/neon-x16.c
+++ b/src/s16-vlshift/gen/neon-x16.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x16(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
for (; batch >= 16; batch -= 16) {
const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x24.c b/src/s16-vlshift/gen/neon-x24.c
index 91f8cb690..1e66c8f61 100644
--- a/src/s16-vlshift/gen/neon-x24.c
+++ b/src/s16-vlshift/gen/neon-x24.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x24(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
for (; batch >= 24; batch -= 24) {
const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x32.c b/src/s16-vlshift/gen/neon-x32.c
index 2c50a9067..0cd66636a 100644
--- a/src/s16-vlshift/gen/neon-x32.c
+++ b/src/s16-vlshift/gen/neon-x32.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x32(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
for (; batch >= 32; batch -= 32) {
const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x8.c b/src/s16-vlshift/gen/neon-x8.c
index d930b9866..62290c910 100644
--- a/src/s16-vlshift/gen/neon-x8.c
+++ b/src/s16-vlshift/gen/neon-x8.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x8(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
// Remainder of full vectors
diff --git a/src/s16-vlshift/gen/scalar-x1.c b/src/s16-vlshift/gen/scalar-x1.c
index bb40fe22f..fbc008be0 100644
--- a/src/s16-vlshift/gen/scalar-x1.c
+++ b/src/s16-vlshift/gen/scalar-x1.c
@@ -18,14 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x1(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
-
+ assert(shift < 16);
if XNN_UNLIKELY(batch != 0) {
do {
diff --git a/src/s16-vlshift/gen/scalar-x2.c b/src/s16-vlshift/gen/scalar-x2.c
index 13ffac72f..d9052d7bb 100644
--- a/src/s16-vlshift/gen/scalar-x2.c
+++ b/src/s16-vlshift/gen/scalar-x2.c
@@ -18,13 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x2(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
for (; batch >= 2; batch -= 2) {
const uint16_t vi0 = (uint16_t) input[0];
@@ -38,7 +38,6 @@ void xnn_s16_vlshift_ukernel__scalar_x2(
output[1] = (int16_t) vout1;
output += 2;
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/gen/scalar-x3.c b/src/s16-vlshift/gen/scalar-x3.c
index 68b729304..55ccc626c 100644
--- a/src/s16-vlshift/gen/scalar-x3.c
+++ b/src/s16-vlshift/gen/scalar-x3.c
@@ -18,13 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x3(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
for (; batch >= 3; batch -= 3) {
const uint16_t vi0 = (uint16_t) input[0];
@@ -41,7 +41,6 @@ void xnn_s16_vlshift_ukernel__scalar_x3(
output[2] = (int16_t) vout2;
output += 3;
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/gen/scalar-x4.c b/src/s16-vlshift/gen/scalar-x4.c
index 3e725b8e0..326c7b01c 100644
--- a/src/s16-vlshift/gen/scalar-x4.c
+++ b/src/s16-vlshift/gen/scalar-x4.c
@@ -18,13 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x4(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
for (; batch >= 4; batch -= 4) {
const uint16_t vi0 = (uint16_t) input[0];
@@ -44,7 +44,6 @@ void xnn_s16_vlshift_ukernel__scalar_x4(
output[3] = (int16_t) vout3;
output += 4;
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/neon.c.in b/src/s16-vlshift/neon.c.in
index 46513cd07..cec559653 100644
--- a/src/s16-vlshift/neon.c.in
+++ b/src/s16-vlshift/neon.c.in
@@ -19,15 +19,15 @@ $SIMD_TILE = BATCH_TILE // 8
void xnn_s16_vlshift_ukernel__neon_x${BATCH_TILE}(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
$if BATCH_TILE > 8:
for (; batch >= ${BATCH_TILE}; batch -= ${BATCH_TILE}) {
diff --git a/src/s16-vlshift/scalar.c.in b/src/s16-vlshift/scalar.c.in
index 1f519380c..e59918d20 100644
--- a/src/s16-vlshift/scalar.c.in
+++ b/src/s16-vlshift/scalar.c.in
@@ -15,13 +15,13 @@ $assert BATCH_TILE >= 1
void xnn_s16_vlshift_ukernel__scalar_x${BATCH_TILE}(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
$if BATCH_TILE > 1:
for (; batch >= ${BATCH_TILE}; batch -= ${BATCH_TILE}) {
@@ -36,7 +36,6 @@ void xnn_s16_vlshift_ukernel__scalar_x${BATCH_TILE}(
output[${C}] = (int16_t) vout${C};
output += ${BATCH_TILE};
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/xnnpack/microfnptr.h b/src/xnnpack/microfnptr.h
index 6499eba53..feeb633d8 100644
--- a/src/xnnpack/microfnptr.h
+++ b/src/xnnpack/microfnptr.h
@@ -1656,10 +1656,10 @@ typedef void (*xnn_u32_filterbank_subtract_ukernel_function)(
uint32_t* output);
typedef void (*xnn_s16_vlshift_ukernel_function)(
- size_t batch_size,
+ size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output);
+ int16_t* output,
+ uint32_t shift);
typedef void (*xnn_cs16_vsquareabs_ukernel_function)(
size_t batch_size,
diff --git a/src/xnnpack/vlshift.h b/src/xnnpack/vlshift.h
index 31f3be488..cba428f67 100644
--- a/src/xnnpack/vlshift.h
+++ b/src/xnnpack/vlshift.h
@@ -17,10 +17,10 @@ extern "C" {
#define DECLARE_S16_VLSHIFT_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
- size_t batch_size, \
+ size_t batch, \
const int16_t* input, \
- uint32_t shift, \
- int16_t* output);
+ int16_t* output, \
+ uint32_t shift);
DECLARE_S16_VLSHIFT_UKERNEL_FUNCTION(xnn_s16_vlshift_ukernel__neon_x8)
diff --git a/test/vlshift-microkernel-tester.h b/test/vlshift-microkernel-tester.h
index 6a65d0d26..7772281b1 100644
--- a/test/vlshift-microkernel-tester.h
+++ b/test/vlshift-microkernel-tester.h
@@ -82,7 +82,7 @@ class VLShiftMicrokernelTester {
}
// Call optimized micro-kernel.
- vlshift(batch(), x_data, shift(), y.data());
+ vlshift(batch(), x_data, y.data(), shift());
// Verify results.
for (size_t n = 0; n < batch(); n++) {