aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-08-21 23:02:52 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-21 23:03:54 -0700
commit0e8f14528965865194a82198c9b621044289fe20 (patch)
tree17060e949f3eda4947c489b0910fe82a345e54cf /src
parent603b49e79d05d296e4229ac4f6fdaa0a2b0532f3 (diff)
downloadXNNPACK-0e8f14528965865194a82198c9b621044289fe20.tar.gz
Unify VLSHIFT interface with other VUNARY microkernels
PiperOrigin-RevId: 469101809
Diffstat (limited to 'src')
-rw-r--r--src/s16-vlshift/gen/neon-x16.c10
-rw-r--r--src/s16-vlshift/gen/neon-x24.c10
-rw-r--r--src/s16-vlshift/gen/neon-x32.c10
-rw-r--r--src/s16-vlshift/gen/neon-x8.c10
-rw-r--r--src/s16-vlshift/gen/scalar-x1.c9
-rw-r--r--src/s16-vlshift/gen/scalar-x2.c9
-rw-r--r--src/s16-vlshift/gen/scalar-x3.c9
-rw-r--r--src/s16-vlshift/gen/scalar-x4.c9
-rw-r--r--src/s16-vlshift/neon.c.in10
-rw-r--r--src/s16-vlshift/scalar.c.in9
-rw-r--r--src/xnnpack/microfnptr.h6
-rw-r--r--src/xnnpack/vlshift.h6
12 files changed, 51 insertions, 56 deletions
diff --git a/src/s16-vlshift/gen/neon-x16.c b/src/s16-vlshift/gen/neon-x16.c
index a2b3d9c83..2e2ca6a16 100644
--- a/src/s16-vlshift/gen/neon-x16.c
+++ b/src/s16-vlshift/gen/neon-x16.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x16(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
for (; batch >= 16; batch -= 16) {
const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x24.c b/src/s16-vlshift/gen/neon-x24.c
index 91f8cb690..1e66c8f61 100644
--- a/src/s16-vlshift/gen/neon-x24.c
+++ b/src/s16-vlshift/gen/neon-x24.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x24(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
for (; batch >= 24; batch -= 24) {
const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x32.c b/src/s16-vlshift/gen/neon-x32.c
index 2c50a9067..0cd66636a 100644
--- a/src/s16-vlshift/gen/neon-x32.c
+++ b/src/s16-vlshift/gen/neon-x32.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x32(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
for (; batch >= 32; batch -= 32) {
const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x8.c b/src/s16-vlshift/gen/neon-x8.c
index d930b9866..62290c910 100644
--- a/src/s16-vlshift/gen/neon-x8.c
+++ b/src/s16-vlshift/gen/neon-x8.c
@@ -20,15 +20,15 @@
void xnn_s16_vlshift_ukernel__neon_x8(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
// Remainder of full vectors
diff --git a/src/s16-vlshift/gen/scalar-x1.c b/src/s16-vlshift/gen/scalar-x1.c
index bb40fe22f..fbc008be0 100644
--- a/src/s16-vlshift/gen/scalar-x1.c
+++ b/src/s16-vlshift/gen/scalar-x1.c
@@ -18,14 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x1(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
-
+ assert(shift < 16);
if XNN_UNLIKELY(batch != 0) {
do {
diff --git a/src/s16-vlshift/gen/scalar-x2.c b/src/s16-vlshift/gen/scalar-x2.c
index 13ffac72f..d9052d7bb 100644
--- a/src/s16-vlshift/gen/scalar-x2.c
+++ b/src/s16-vlshift/gen/scalar-x2.c
@@ -18,13 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x2(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
for (; batch >= 2; batch -= 2) {
const uint16_t vi0 = (uint16_t) input[0];
@@ -38,7 +38,6 @@ void xnn_s16_vlshift_ukernel__scalar_x2(
output[1] = (int16_t) vout1;
output += 2;
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/gen/scalar-x3.c b/src/s16-vlshift/gen/scalar-x3.c
index 68b729304..55ccc626c 100644
--- a/src/s16-vlshift/gen/scalar-x3.c
+++ b/src/s16-vlshift/gen/scalar-x3.c
@@ -18,13 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x3(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
for (; batch >= 3; batch -= 3) {
const uint16_t vi0 = (uint16_t) input[0];
@@ -41,7 +41,6 @@ void xnn_s16_vlshift_ukernel__scalar_x3(
output[2] = (int16_t) vout2;
output += 3;
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/gen/scalar-x4.c b/src/s16-vlshift/gen/scalar-x4.c
index 3e725b8e0..326c7b01c 100644
--- a/src/s16-vlshift/gen/scalar-x4.c
+++ b/src/s16-vlshift/gen/scalar-x4.c
@@ -18,13 +18,13 @@
void xnn_s16_vlshift_ukernel__scalar_x4(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
for (; batch >= 4; batch -= 4) {
const uint16_t vi0 = (uint16_t) input[0];
@@ -44,7 +44,6 @@ void xnn_s16_vlshift_ukernel__scalar_x4(
output[3] = (int16_t) vout3;
output += 4;
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/neon.c.in b/src/s16-vlshift/neon.c.in
index 46513cd07..cec559653 100644
--- a/src/s16-vlshift/neon.c.in
+++ b/src/s16-vlshift/neon.c.in
@@ -19,15 +19,15 @@ $SIMD_TILE = BATCH_TILE // 8
void xnn_s16_vlshift_ukernel__neon_x${BATCH_TILE}(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch > 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
- const int16x8_t vshift = vdupq_n_s16(shift);
+ const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
$if BATCH_TILE > 8:
for (; batch >= ${BATCH_TILE}; batch -= ${BATCH_TILE}) {
diff --git a/src/s16-vlshift/scalar.c.in b/src/s16-vlshift/scalar.c.in
index 1f519380c..e59918d20 100644
--- a/src/s16-vlshift/scalar.c.in
+++ b/src/s16-vlshift/scalar.c.in
@@ -15,13 +15,13 @@ $assert BATCH_TILE >= 1
void xnn_s16_vlshift_ukernel__scalar_x${BATCH_TILE}(
size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output) {
-
+ int16_t* output,
+ uint32_t shift)
+{
assert(batch != 0);
assert(input != NULL);
- assert(shift < 16);
assert(output != NULL);
+ assert(shift < 16);
$if BATCH_TILE > 1:
for (; batch >= ${BATCH_TILE}; batch -= ${BATCH_TILE}) {
@@ -36,7 +36,6 @@ void xnn_s16_vlshift_ukernel__scalar_x${BATCH_TILE}(
output[${C}] = (int16_t) vout${C};
output += ${BATCH_TILE};
}
-
if XNN_UNLIKELY(batch != 0) {
do {
const uint16_t vi = (uint16_t) *input++;
diff --git a/src/xnnpack/microfnptr.h b/src/xnnpack/microfnptr.h
index 6499eba53..feeb633d8 100644
--- a/src/xnnpack/microfnptr.h
+++ b/src/xnnpack/microfnptr.h
@@ -1656,10 +1656,10 @@ typedef void (*xnn_u32_filterbank_subtract_ukernel_function)(
uint32_t* output);
typedef void (*xnn_s16_vlshift_ukernel_function)(
- size_t batch_size,
+ size_t batch,
const int16_t* input,
- uint32_t shift,
- int16_t* output);
+ int16_t* output,
+ uint32_t shift);
typedef void (*xnn_cs16_vsquareabs_ukernel_function)(
size_t batch_size,
diff --git a/src/xnnpack/vlshift.h b/src/xnnpack/vlshift.h
index 31f3be488..cba428f67 100644
--- a/src/xnnpack/vlshift.h
+++ b/src/xnnpack/vlshift.h
@@ -17,10 +17,10 @@ extern "C" {
#define DECLARE_S16_VLSHIFT_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
- size_t batch_size, \
+ size_t batch, \
const int16_t* input, \
- uint32_t shift, \
- int16_t* output);
+ int16_t* output, \
+ uint32_t shift);
DECLARE_S16_VLSHIFT_UKERNEL_FUNCTION(xnn_s16_vlshift_ukernel__neon_x8)