aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--compute_ref.c2
-rw-r--r--ref-rvct.txt40
-rw-r--r--ref_vsXi_n.c108
-rw-r--r--ref_vsli_n.c70
-rw-r--r--ref_vsri_n.c61
6 files changed, 236 insertions, 47 deletions
diff --git a/Makefile b/Makefile
index c389e77..f9bfae7 100644
--- a/Makefile
+++ b/Makefile
@@ -127,7 +127,7 @@ ref_vqdmlsl.*.o: ref_vqdmlal.c
ref_vqdmlsl_lane.*.o: ref_vqdmlal_lane.c
ref_vqdmlsl_n.*.o: ref_vqdmlal_n.c
ref_vtrn.*.o ref_vzip.*.o: ref_vuzp.c
-ref_vsri_n.*.o: ref_vsli_n.c
+ref_vsli_n.*.o ref_vsri_n.*.o: ref_vsXi_n.c
ref_vraddhn.*.o ref_vsubhn.*.o ref_vrsubhn.*.o: ref_vaddhn.c
ref_vsubl.*.o: ref_vaddl.c
ref_vsubw.*.o: ref_vaddw.c
diff --git a/compute_ref.c b/compute_ref.c
index e1109b9..6ec7724 100644
--- a/compute_ref.c
+++ b/compute_ref.c
@@ -287,10 +287,8 @@ int main ()
exec_vmls_lane ();
exec_vmla_n ();
exec_vmls_n ();
-#if 0
exec_vsli_n ();
exec_vsri_n ();
-#endif
exec_vtst ();
exec_vaddhn ();
exec_vraddhn ();
diff --git a/ref-rvct.txt b/ref-rvct.txt
index 8c95fbd..c1ab223 100644
--- a/ref-rvct.txt
+++ b/ref-rvct.txt
@@ -4203,6 +4203,26 @@ VSLI_N:15:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3, }
VSLI_N:16:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1, }
VSLI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, }
+VSLI_Nmax shift amount output:
+VSLI_N:0:result_int8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, }
+VSLI_N:1:result_int16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, }
+VSLI_N:2:result_int32x2 [] = { fffffff0, fffffff1, }
+VSLI_N:3:result_int64x1 [] = { 7ffffffffffffff0, }
+VSLI_N:4:result_uint8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, }
+VSLI_N:5:result_uint16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, }
+VSLI_N:6:result_uint32x2 [] = { 7ffffff0, 7ffffff1, }
+VSLI_N:7:result_uint64x1 [] = { 7ffffffffffffff0, }
+VSLI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, }
+VSLI_N:9:result_int8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, }
+VSLI_N:10:result_int16x8 [] = { 7ff0, 7ff1, 7ff2, 7ff3, 7ff4, 7ff5, 7ff6, 7ff7, }
+VSLI_N:11:result_int32x4 [] = { 7ffffff0, 7ffffff1, 7ffffff2, 7ffffff3, }
+VSLI_N:12:result_int64x2 [] = { 7ffffffffffffff0, 7ffffffffffffff1, }
+VSLI_N:13:result_uint8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, }
+VSLI_N:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, }
+VSLI_N:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, }
+VSLI_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, }
+VSLI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, }
+
VSRI_N output:
VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, }
VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, }
@@ -4223,6 +4243,26 @@ VSRI_N:15:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00, }
VSRI_N:16:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800, }
VSRI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, }
+VSRI_Nmax shift amount output:
+VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, }
+VSRI_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, }
+VSRI_N:2:result_int32x2 [] = { fffffff0, fffffff1, }
+VSRI_N:3:result_int64x1 [] = { fffffffffffffff0, }
+VSRI_N:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, }
+VSRI_N:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, }
+VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff1, }
+VSRI_N:7:result_uint64x1 [] = { fffffffffffffff0, }
+VSRI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, }
+VSRI_N:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, }
+VSRI_N:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, }
+VSRI_N:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, }
+VSRI_N:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, }
+VSRI_N:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, }
+VSRI_N:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, }
+VSRI_N:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, }
+VSRI_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, }
+VSRI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, }
+
VTST/VTSTQ (signed input) output:
VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, }
VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, }
diff --git a/ref_vsXi_n.c b/ref_vsXi_n.c
new file mode 100644
index 0000000..20f207a
--- /dev/null
+++ b/ref_vsXi_n.c
@@ -0,0 +1,108 @@
+/*
+
+Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Written by Christophe Lyon
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#error Target not supported
+#endif
+
+#include "stm-arm-neon-ref.h"
+
+#ifndef INSN_NAME
+#define INSN_NAME vsli
+#define TEST_MSG "VSLI_N"
+#endif
+
+#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+ /* vector_res = vsxi_n(vector, vector2, val),
+ then store the result. */
+#define TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) \
+ VECT_VAR(vector_res, T1, W, N) = \
+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
+ VECT_VAR(vector2, T1, W, N), \
+ V); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VSXI_N(INSN, Q, T1, T2, W, N, V) \
+ TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V)
+
+ /* With ARM RVCT, we need to declare variables before any executable
+ statement */
+ DECL_VARIABLE_ALL_VARIANTS(vector);
+ DECL_VARIABLE_ALL_VARIANTS(vector2);
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+ clean_results ();
+
+ /* Initialize input "vector" from "buffer" */
+ TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+ /* Fill input vector2 with arbitrary values */
+ TEST_VDUP(vector2, , int, s, 8, 8, 2);
+ TEST_VDUP(vector2, , int, s, 16, 4, -4);
+ TEST_VDUP(vector2, , int, s, 32, 2, 3);
+ TEST_VDUP(vector2, , int, s, 64, 1, 100);
+ TEST_VDUP(vector2, , uint, u, 8, 8, 20);
+ TEST_VDUP(vector2, , uint, u, 16, 4, 30);
+ TEST_VDUP(vector2, , uint, u, 32, 2, 40);
+ TEST_VDUP(vector2, , uint, u, 64, 1, 2);
+ TEST_VDUP(vector2, q, int, s, 8, 16, -10);
+ TEST_VDUP(vector2, q, int, s, 16, 8, -20);
+ TEST_VDUP(vector2, q, int, s, 32, 4, -30);
+ TEST_VDUP(vector2, q, int, s, 64, 2, 24);
+ TEST_VDUP(vector2, q, uint, u, 8, 16, 12);
+ TEST_VDUP(vector2, q, uint, u, 16, 8, 3);
+ TEST_VDUP(vector2, q, uint, u, 32, 4, 55);
+ TEST_VDUP(vector2, q, uint, u, 64, 2, 3);
+
+ /* Choose shift amount arbitrarily */
+ TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 4);
+ TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 3);
+ TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 1);
+ TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 32);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 2);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 10);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 30);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 3);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 5);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 3);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 20);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 16);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 3);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 12);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 23);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 53);
+
+ dump_results_hex (TEST_MSG);
+
+#ifdef EXTRA_TESTS
+ EXTRA_TESTS();
+#endif
+}
diff --git a/ref_vsli_n.c b/ref_vsli_n.c
index 2666af6..5f6ed23 100644
--- a/ref_vsli_n.c
+++ b/ref_vsli_n.c
@@ -23,35 +23,19 @@ THE SOFTWARE.
*/
-#ifdef __arm__
-#include <arm_neon.h>
-#else
-#error Target not supported
-#endif
-
-#include "stm-arm-neon-ref.h"
-
-#ifndef INSN_NAME
#define INSN_NAME vsli
#define TEST_MSG "VSLI_N"
-#endif
-#define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
-#define FNNAME(NAME) FNNAME1(NAME)
+/* Extra tests for functions requiring corner cases tests */
+void vsli_extra(void);
+#define EXTRA_TESTS vsli_extra
-FNNAME (INSN_NAME)
-{
- /* vector_res = vmlx_n(vector, vector2, val),
- then store the result. */
-#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) \
- VECT_VAR(vector_res, T1, W, N) = \
- INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
- VECT_VAR(vector2, T1, W, N), \
- V); \
- vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+#include "ref_vsXi_n.c"
-#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V) \
- TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)
+void vsli_extra(void)
+{
+ /* Test cases with maximum shift amount (this amount is different
+ * from vsri. */
/* With ARM RVCT, we need to declare variables before any executable
statement */
@@ -82,23 +66,23 @@ FNNAME (INSN_NAME)
TEST_VDUP(vector2, q, uint, u, 32, 4, 55);
TEST_VDUP(vector2, q, uint, u, 64, 2, 3);
- /* Choose shift amount arbitrarily */
- TEST_VMLX_N(INSN_NAME, , int, s, 8, 8, 4);
- TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 3);
- TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 1);
- TEST_VMLX_N(INSN_NAME, , int, s, 64, 1, 32);
- TEST_VMLX_N(INSN_NAME, , uint, u, 8, 8, 2);
- TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 10);
- TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 30);
- TEST_VMLX_N(INSN_NAME, , uint, u, 64, 1, 3);
- TEST_VMLX_N(INSN_NAME, q, int, s, 8, 16, 5);
- TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 3);
- TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 20);
- TEST_VMLX_N(INSN_NAME, q, int, s, 64, 2, 16);
- TEST_VMLX_N(INSN_NAME, q, uint, u, 8, 16, 3);
- TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 12);
- TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 23);
- TEST_VMLX_N(INSN_NAME, q, uint, u, 64, 2, 53);
-
- dump_results_hex (TEST_MSG);
+ /* Use maximum allowed shift amount */
+ TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 7);
+ TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 15);
+ TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 31);
+ TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 63);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 7);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 15);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 31);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 63);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 7);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 15);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 31);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 63);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 7);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 15);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 31);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 63);
+
+ dump_results_hex2 (TEST_MSG, "max shift amount");
}
diff --git a/ref_vsri_n.c b/ref_vsri_n.c
index a4e916a..90ed127 100644
--- a/ref_vsri_n.c
+++ b/ref_vsri_n.c
@@ -26,4 +26,63 @@ THE SOFTWARE.
#define INSN_NAME vsri
#define TEST_MSG "VSRI_N"
-#include "ref_vsli_n.c"
+/* Extra tests for functions requiring corner cases tests */
+void vsri_extra(void);
+#define EXTRA_TESTS vsri_extra
+
+#include "ref_vsXi_n.c"
+
+void vsri_extra(void)
+{
+ /* Test cases with maximum shift amount (this amount is different
+ * from vsli. */
+
+ /* With ARM RVCT, we need to declare variables before any executable
+ statement */
+ DECL_VARIABLE_ALL_VARIANTS(vector);
+ DECL_VARIABLE_ALL_VARIANTS(vector2);
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+ clean_results ();
+
+ /* Initialize input "vector" from "buffer" */
+ TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
+
+ /* Fill input vector2 with arbitrary values */
+ TEST_VDUP(vector2, , int, s, 8, 8, 2);
+ TEST_VDUP(vector2, , int, s, 16, 4, -4);
+ TEST_VDUP(vector2, , int, s, 32, 2, 3);
+ TEST_VDUP(vector2, , int, s, 64, 1, 100);
+ TEST_VDUP(vector2, , uint, u, 8, 8, 20);
+ TEST_VDUP(vector2, , uint, u, 16, 4, 30);
+ TEST_VDUP(vector2, , uint, u, 32, 2, 40);
+ TEST_VDUP(vector2, , uint, u, 64, 1, 2);
+ TEST_VDUP(vector2, q, int, s, 8, 16, -10);
+ TEST_VDUP(vector2, q, int, s, 16, 8, -20);
+ TEST_VDUP(vector2, q, int, s, 32, 4, -30);
+ TEST_VDUP(vector2, q, int, s, 64, 2, 24);
+ TEST_VDUP(vector2, q, uint, u, 8, 16, 12);
+ TEST_VDUP(vector2, q, uint, u, 16, 8, 3);
+ TEST_VDUP(vector2, q, uint, u, 32, 4, 55);
+ TEST_VDUP(vector2, q, uint, u, 64, 2, 3);
+
+ /* Use maximum allowed shift amount */
+ TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 8);
+ TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 16);
+ TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 32);
+ TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 64);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 8);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 16);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 32);
+ TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 64);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 8);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 16);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 32);
+ TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 64);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 8);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 16);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 32);
+ TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 64);
+
+ dump_results_hex2 (TEST_MSG, "max shift amount");
+}