aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-08-22 11:14:28 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-22 11:15:19 -0700
commit8025ee59c5c8e0446bada40d565e7660199a42bc (patch)
treeafedee6dc8e0b352e6ecf971e30e8d0b31ba09b4
parentbd223d2c4ab7f618b878292d275a5d7ee9bab038 (diff)
downloadXNNPACK-8025ee59c5c8e0446bada40d565e7660199a42bc.tar.gz
bfly4m1 use a single pointer for data
PiperOrigin-RevId: 469235984
-rw-r--r--src/cs16-bfly4/gen/scalar-m1-x1.c32
-rw-r--r--src/cs16-bfly4/scalar.c.in68
2 files changed, 58 insertions, 42 deletions
diff --git a/src/cs16-bfly4/gen/scalar-m1-x1.c b/src/cs16-bfly4/gen/scalar-m1-x1.c
index 26361abd7..09c47ff40 100644
--- a/src/cs16-bfly4/gen/scalar-m1-x1.c
+++ b/src/cs16-bfly4/gen/scalar-m1-x1.c
@@ -22,9 +22,6 @@ void xnn_cs16_bfly4m1_ukernel__scalar_x1(
const int16_t* twiddle) {
int16_t* out0 = data;
- int16_t* out1 = data + samples * 2;
- int16_t* out2 = data + samples * 4;
- int16_t* out3 = data + samples * 6;
assert(samples == 1);
assert(data != NULL);
@@ -36,12 +33,12 @@ void xnn_cs16_bfly4m1_ukernel__scalar_x1(
do {
int32_t vout0r = (int32_t) out0[0];
int32_t vout0i = (int32_t) out0[1];
- int32_t vout1r = (int32_t) out1[0];
- int32_t vout1i = (int32_t) out1[1];
- int32_t vout2r = (int32_t) out2[0];
- int32_t vout2i = (int32_t) out2[1];
- int32_t vout3r = (int32_t) out3[0];
- int32_t vout3i = (int32_t) out3[1];
+ int32_t vout1r = (int32_t) out0[2];
+ int32_t vout1i = (int32_t) out0[3];
+ int32_t vout2r = (int32_t) out0[4];
+ int32_t vout2i = (int32_t) out0[5];
+ int32_t vout3r = (int32_t) out0[6];
+ int32_t vout3i = (int32_t) out0[7];
// Note 32767 / 4 = 8191. Should be 8192.
@@ -54,6 +51,7 @@ void xnn_cs16_bfly4m1_ukernel__scalar_x1(
vout3r = math_asr_s32(vout3r * 8191 + 16384, 15);
vout3i = math_asr_s32(vout3i * 8191 + 16384, 15);
+ // Note 32767 should be 32768 representing a multiply by 1.
const int32_t vtmp0r = math_asr_s32(vout1r * 32767 + 16384, 15);
const int32_t vtmp0i = math_asr_s32(vout1i * 32767 + 16384, 15);
const int32_t vtmp1r = math_asr_s32(vout2r * 32767 + 16384, 15);
@@ -82,16 +80,12 @@ void xnn_cs16_bfly4m1_ukernel__scalar_x1(
out0[0] = (int16_t) vout0r;
out0[1] = (int16_t) vout0i;
- out1[0] = (int16_t) vout1r;
- out1[1] = (int16_t) vout1i;
- out2[0] = (int16_t) vout2r;
- out2[1] = (int16_t) vout2i;
- out3[0] = (int16_t) vout3r;
- out3[1] = (int16_t) vout3i;
- out0 += 2;
- out1 += 2;
- out2 += 2;
- out3 += 2;
+ out0[2] = (int16_t) vout1r;
+ out0[3] = (int16_t) vout1i;
+ out0[4] = (int16_t) vout2r;
+ out0[5] = (int16_t) vout2i;
+ out0[6] = (int16_t) vout3r;
+ out0[7] = (int16_t) vout3i;
} while(--samples != 0);
}
}
diff --git a/src/cs16-bfly4/scalar.c.in b/src/cs16-bfly4/scalar.c.in
index 4f31c277f..23d5bfcbd 100644
--- a/src/cs16-bfly4/scalar.c.in
+++ b/src/cs16-bfly4/scalar.c.in
@@ -24,9 +24,10 @@ void xnn_cs16_bfly4${VARIANT}_ukernel__scalar_x${SAMPLE_TILE}(
const int16_t* tw2 = tw1;
const int16_t* tw3 = tw1;
int16_t* out0 = data;
- int16_t* out1 = data + samples * 2;
- int16_t* out2 = data + samples * 4;
- int16_t* out3 = data + samples * 6;
+ $if M != 1:
+ int16_t* out1 = data + samples * 2;
+ int16_t* out2 = data + samples * 4;
+ int16_t* out3 = data + samples * 6;
$if M != 0:
assert(samples == ${M});
@@ -148,14 +149,24 @@ void xnn_cs16_bfly4${VARIANT}_ukernel__scalar_x${SAMPLE_TILE}(
if XNN_UNLIKELY(samples != 0) {
do {
- int32_t vout0r = (int32_t) out0[0];
- int32_t vout0i = (int32_t) out0[1];
- int32_t vout1r = (int32_t) out1[0];
- int32_t vout1i = (int32_t) out1[1];
- int32_t vout2r = (int32_t) out2[0];
- int32_t vout2i = (int32_t) out2[1];
- int32_t vout3r = (int32_t) out3[0];
- int32_t vout3i = (int32_t) out3[1];
+ $if M == 1:
+ int32_t vout0r = (int32_t) out0[0];
+ int32_t vout0i = (int32_t) out0[1];
+ int32_t vout1r = (int32_t) out0[2];
+ int32_t vout1i = (int32_t) out0[3];
+ int32_t vout2r = (int32_t) out0[4];
+ int32_t vout2i = (int32_t) out0[5];
+ int32_t vout3r = (int32_t) out0[6];
+ int32_t vout3i = (int32_t) out0[7];
+ $else:
+ int32_t vout0r = (int32_t) out0[0];
+ int32_t vout0i = (int32_t) out0[1];
+ int32_t vout1r = (int32_t) out1[0];
+ int32_t vout1i = (int32_t) out1[1];
+ int32_t vout2r = (int32_t) out2[0];
+ int32_t vout2i = (int32_t) out2[1];
+ int32_t vout3r = (int32_t) out3[0];
+ int32_t vout3i = (int32_t) out3[1];
$if M != 1:
const int32_t vtw1r = (const int32_t) tw1[0];
@@ -179,6 +190,7 @@ void xnn_cs16_bfly4${VARIANT}_ukernel__scalar_x${SAMPLE_TILE}(
vout3i = math_asr_s32(vout3i * 8191 + 16384, 15);
$if M == 1:
+ // Note 32767 should be 32768 representing a multiply by 1.
const int32_t vtmp0r = math_asr_s32(vout1r * 32767 + 16384, 15);
const int32_t vtmp0i = math_asr_s32(vout1i * 32767 + 16384, 15);
const int32_t vtmp1r = math_asr_s32(vout2r * 32767 + 16384, 15);
@@ -212,18 +224,28 @@ void xnn_cs16_bfly4${VARIANT}_ukernel__scalar_x${SAMPLE_TILE}(
vout3r = vtmp5r - vtmp4i;
vout3i = vtmp5i + vtmp4r;
- out0[0] = (int16_t) vout0r;
- out0[1] = (int16_t) vout0i;
- out1[0] = (int16_t) vout1r;
- out1[1] = (int16_t) vout1i;
- out2[0] = (int16_t) vout2r;
- out2[1] = (int16_t) vout2i;
- out3[0] = (int16_t) vout3r;
- out3[1] = (int16_t) vout3i;
- out0 += 2;
- out1 += 2;
- out2 += 2;
- out3 += 2;
+ $if M == 1:
+ out0[0] = (int16_t) vout0r;
+ out0[1] = (int16_t) vout0i;
+ out0[2] = (int16_t) vout1r;
+ out0[3] = (int16_t) vout1i;
+ out0[4] = (int16_t) vout2r;
+ out0[5] = (int16_t) vout2i;
+ out0[6] = (int16_t) vout3r;
+ out0[7] = (int16_t) vout3i;
+ $else:
+ out0[0] = (int16_t) vout0r;
+ out0[1] = (int16_t) vout0i;
+ out1[0] = (int16_t) vout1r;
+ out1[1] = (int16_t) vout1i;
+ out2[0] = (int16_t) vout2r;
+ out2[1] = (int16_t) vout2i;
+ out3[0] = (int16_t) vout3r;
+ out3[1] = (int16_t) vout3i;
+ out0 += 2;
+ out1 += 2;
+ out2 += 2;
+ out3 += 2;
} while(--samples != 0);
}
}