Unify VLSHIFT interface with other VUNARY microkernels

PiperOrigin-RevId: 469101809
author: Marat Dukhan <maratek@google.com> 2022-08-21 23:02:52 -0700
committer: XNNPACK Team <xnnpack-github-robot@google.com> 2022-08-21 23:03:54 -0700
commit: 0e8f14528965865194a82198c9b621044289fe20 (patch)
tree: 17060e949f3eda4947c489b0910fe82a345e54cf /src
parent: 603b49e79d05d296e4229ac4f6fdaa0a2b0532f3 (diff)
download: XNNPACK-0e8f14528965865194a82198c9b621044289fe20.tar.gz
12 files changed, 51 insertions, 56 deletions
diff --git a/src/s16-vlshift/gen/neon-x16.c b/src/s16-vlshift/gen/neon-x16.c
index a2b3d9c83..2e2ca6a16 100644
--- a/src/s16-vlshift/gen/neon-x16.c
+++ b/src/s16-vlshift/gen/neon-x16.c
@@ -20,15 +20,15 @@
 void xnn_s16_vlshift_ukernel__neon_x16(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch > 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
-  const int16x8_t vshift = vdupq_n_s16(shift);
+  const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
 
   for (; batch >= 16; batch -= 16) {
     const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x24.c b/src/s16-vlshift/gen/neon-x24.c
index 91f8cb690..1e66c8f61 100644
--- a/src/s16-vlshift/gen/neon-x24.c
+++ b/src/s16-vlshift/gen/neon-x24.c
@@ -20,15 +20,15 @@
 void xnn_s16_vlshift_ukernel__neon_x24(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch > 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
-  const int16x8_t vshift = vdupq_n_s16(shift);
+  const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
 
   for (; batch >= 24; batch -= 24) {
     const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x32.c b/src/s16-vlshift/gen/neon-x32.c
index 2c50a9067..0cd66636a 100644
--- a/src/s16-vlshift/gen/neon-x32.c
+++ b/src/s16-vlshift/gen/neon-x32.c
@@ -20,15 +20,15 @@
 void xnn_s16_vlshift_ukernel__neon_x32(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch > 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
-  const int16x8_t vshift = vdupq_n_s16(shift);
+  const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
 
   for (; batch >= 32; batch -= 32) {
     const int16x8_t vi0 = vld1q_s16(input); input += 8;
diff --git a/src/s16-vlshift/gen/neon-x8.c b/src/s16-vlshift/gen/neon-x8.c
index d930b9866..62290c910 100644
--- a/src/s16-vlshift/gen/neon-x8.c
+++ b/src/s16-vlshift/gen/neon-x8.c
@@ -20,15 +20,15 @@
 void xnn_s16_vlshift_ukernel__neon_x8(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch > 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
-  const int16x8_t vshift = vdupq_n_s16(shift);
+  const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
 
 
   // Remainder of full vectors
diff --git a/src/s16-vlshift/gen/scalar-x1.c b/src/s16-vlshift/gen/scalar-x1.c
index bb40fe22f..fbc008be0 100644
--- a/src/s16-vlshift/gen/scalar-x1.c
+++ b/src/s16-vlshift/gen/scalar-x1.c
@@ -18,14 +18,13 @@
 void xnn_s16_vlshift_ukernel__scalar_x1(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch != 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
-
+  assert(shift < 16);
 
  if XNN_UNLIKELY(batch != 0) {
    do {
diff --git a/src/s16-vlshift/gen/scalar-x2.c b/src/s16-vlshift/gen/scalar-x2.c
index 13ffac72f..d9052d7bb 100644
--- a/src/s16-vlshift/gen/scalar-x2.c
+++ b/src/s16-vlshift/gen/scalar-x2.c
@@ -18,13 +18,13 @@
 void xnn_s16_vlshift_ukernel__scalar_x2(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch != 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
   for (; batch >= 2; batch -= 2) {
     const uint16_t vi0 = (uint16_t) input[0];
@@ -38,7 +38,6 @@ void xnn_s16_vlshift_ukernel__scalar_x2(
     output[1] = (int16_t) vout1;
     output += 2;
   }
-
  if XNN_UNLIKELY(batch != 0) {
    do {
      const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/gen/scalar-x3.c b/src/s16-vlshift/gen/scalar-x3.c
index 68b729304..55ccc626c 100644
--- a/src/s16-vlshift/gen/scalar-x3.c
+++ b/src/s16-vlshift/gen/scalar-x3.c
@@ -18,13 +18,13 @@
 void xnn_s16_vlshift_ukernel__scalar_x3(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch != 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
   for (; batch >= 3; batch -= 3) {
     const uint16_t vi0 = (uint16_t) input[0];
@@ -41,7 +41,6 @@ void xnn_s16_vlshift_ukernel__scalar_x3(
     output[2] = (int16_t) vout2;
     output += 3;
   }
-
  if XNN_UNLIKELY(batch != 0) {
    do {
      const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/gen/scalar-x4.c b/src/s16-vlshift/gen/scalar-x4.c
index 3e725b8e0..326c7b01c 100644
--- a/src/s16-vlshift/gen/scalar-x4.c
+++ b/src/s16-vlshift/gen/scalar-x4.c
@@ -18,13 +18,13 @@
 void xnn_s16_vlshift_ukernel__scalar_x4(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch != 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
   for (; batch >= 4; batch -= 4) {
     const uint16_t vi0 = (uint16_t) input[0];
@@ -44,7 +44,6 @@ void xnn_s16_vlshift_ukernel__scalar_x4(
     output[3] = (int16_t) vout3;
     output += 4;
   }
-
  if XNN_UNLIKELY(batch != 0) {
    do {
      const uint16_t vi = (uint16_t) *input++;
diff --git a/src/s16-vlshift/neon.c.in b/src/s16-vlshift/neon.c.in
index 46513cd07..cec559653 100644
--- a/src/s16-vlshift/neon.c.in
+++ b/src/s16-vlshift/neon.c.in
@@ -19,15 +19,15 @@ $SIMD_TILE = BATCH_TILE // 8
 void xnn_s16_vlshift_ukernel__neon_x${BATCH_TILE}(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch > 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
-  const int16x8_t vshift = vdupq_n_s16(shift);
+  const int16x8_t vshift = vdupq_n_s16((int16_t) shift);
 
   $if BATCH_TILE > 8:
     for (; batch >= ${BATCH_TILE}; batch -= ${BATCH_TILE}) {
diff --git a/src/s16-vlshift/scalar.c.in b/src/s16-vlshift/scalar.c.in
index 1f519380c..e59918d20 100644
--- a/src/s16-vlshift/scalar.c.in
+++ b/src/s16-vlshift/scalar.c.in
@@ -15,13 +15,13 @@ $assert BATCH_TILE >= 1
 void xnn_s16_vlshift_ukernel__scalar_x${BATCH_TILE}(
     size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output) {
-
+    int16_t* output,
+    uint32_t shift)
+{
   assert(batch != 0);
   assert(input != NULL);
-  assert(shift < 16);
   assert(output != NULL);
+  assert(shift < 16);
 
   $if BATCH_TILE > 1:
     for (; batch >= ${BATCH_TILE}; batch -= ${BATCH_TILE}) {
@@ -36,7 +36,6 @@ void xnn_s16_vlshift_ukernel__scalar_x${BATCH_TILE}(
         output[${C}] = (int16_t) vout${C};
       output += ${BATCH_TILE};
     }
-
  if XNN_UNLIKELY(batch != 0) {
    do {
      const uint16_t vi = (uint16_t) *input++;
diff --git a/src/xnnpack/microfnptr.h b/src/xnnpack/microfnptr.h
index 6499eba53..feeb633d8 100644
--- a/src/xnnpack/microfnptr.h
+++ b/src/xnnpack/microfnptr.h
@@ -1656,10 +1656,10 @@ typedef void (*xnn_u32_filterbank_subtract_ukernel_function)(
     uint32_t* output);
 
 typedef void (*xnn_s16_vlshift_ukernel_function)(
-    size_t batch_size,
+    size_t batch,
     const int16_t* input,
-    uint32_t shift,
-    int16_t* output);
+    int16_t* output,
+    uint32_t shift);
 
 typedef void (*xnn_cs16_vsquareabs_ukernel_function)(
     size_t batch_size,
diff --git a/src/xnnpack/vlshift.h b/src/xnnpack/vlshift.h
index 31f3be488..cba428f67 100644
--- a/src/xnnpack/vlshift.h
+++ b/src/xnnpack/vlshift.h
@@ -17,10 +17,10 @@ extern "C" {
 
 #define DECLARE_S16_VLSHIFT_UKERNEL_FUNCTION(fn_name) \
   XNN_INTERNAL void fn_name(                          \
-    size_t batch_size,                                \
+    size_t batch,                                     \
     const int16_t* input,                             \
-    uint32_t shift,                                   \
-    int16_t* output);
+    int16_t* output,                                  \
+    uint32_t shift);
 
 
 DECLARE_S16_VLSHIFT_UKERNEL_FUNCTION(xnn_s16_vlshift_ukernel__neon_x8)
author	Marat Dukhan <maratek@google.com>	2022-08-21 23:02:52 -0700
committer	XNNPACK Team <xnnpack-github-robot@google.com>	2022-08-21 23:03:54 -0700
commit	0e8f14528965865194a82198c9b621044289fe20 (patch)
tree	17060e949f3eda4947c489b0910fe82a345e54cf /src
parent	603b49e79d05d296e4229ac4f6fdaa0a2b0532f3 (diff)
download	XNNPACK-0e8f14528965865194a82198c9b621044289fe20.tar.gz