diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | compute_ref.c | 2 | ||||
-rw-r--r-- | ref-rvct.txt | 40 | ||||
-rw-r--r-- | ref_vsXi_n.c | 108 | ||||
-rw-r--r-- | ref_vsli_n.c | 70 | ||||
-rw-r--r-- | ref_vsri_n.c | 61 |
6 files changed, 236 insertions, 47 deletions
@@ -127,7 +127,7 @@ ref_vqdmlsl.*.o: ref_vqdmlal.c ref_vqdmlsl_lane.*.o: ref_vqdmlal_lane.c ref_vqdmlsl_n.*.o: ref_vqdmlal_n.c ref_vtrn.*.o ref_vzip.*.o: ref_vuzp.c -ref_vsri_n.*.o: ref_vsli_n.c +ref_vsli_n.*.o ref_vsri_n.*.o: ref_vsXi_n.c ref_vraddhn.*.o ref_vsubhn.*.o ref_vrsubhn.*.o: ref_vaddhn.c ref_vsubl.*.o: ref_vaddl.c ref_vsubw.*.o: ref_vaddw.c diff --git a/compute_ref.c b/compute_ref.c index e1109b9..6ec7724 100644 --- a/compute_ref.c +++ b/compute_ref.c @@ -287,10 +287,8 @@ int main () exec_vmls_lane (); exec_vmla_n (); exec_vmls_n (); -#if 0 exec_vsli_n (); exec_vsri_n (); -#endif exec_vtst (); exec_vaddhn (); exec_vraddhn (); diff --git a/ref-rvct.txt b/ref-rvct.txt index 8c95fbd..c1ab223 100644 --- a/ref-rvct.txt +++ b/ref-rvct.txt @@ -4203,6 +4203,26 @@ VSLI_N:15:result_uint32x4 [] = { 1bfffff0, 1bfffff1, 1bfffff2, 1bfffff3, } VSLI_N:16:result_uint64x2 [] = { 7ffffffffffff0, 7ffffffffffff1, } VSLI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSLI_Nmax shift amount output: +VSLI_N:0:result_int8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:1:result_int16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSLI_N:3:result_int64x1 [] = { 7ffffffffffffff0, } +VSLI_N:4:result_uint8x8 [] = { 70, 71, 72, 73, 74, 75, 76, 77, } +VSLI_N:5:result_uint16x4 [] = { 7ff0, 7ff1, 7ff2, 7ff3, } +VSLI_N:6:result_uint32x2 [] = { 7ffffff0, 7ffffff1, } +VSLI_N:7:result_uint64x1 [] = { 7ffffffffffffff0, } +VSLI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSLI_N:9:result_int8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:10:result_int16x8 [] = { 7ff0, 7ff1, 7ff2, 7ff3, 7ff4, 7ff5, 7ff6, 7ff7, } +VSLI_N:11:result_int32x4 [] = { 7ffffff0, 7ffffff1, 7ffffff2, 7ffffff3, } +VSLI_N:12:result_int64x2 [] = { 7ffffffffffffff0, 7ffffffffffffff1, } +VSLI_N:13:result_uint8x16 [] = { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 7a, 7b, 7c, 7d, 7e, 7f, } +VSLI_N:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSLI_N:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSLI_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSLI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + VSRI_N output: VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, fffffff0, } VSRI_N:1:result_int16x4 [] = { ffffffff, ffffffff, ffffffff, ffffffff, } @@ -4223,6 +4243,26 @@ VSRI_N:15:result_uint32x4 [] = { fffffe00, fffffe00, fffffe00, fffffe00, } VSRI_N:16:result_uint64x2 [] = { fffffffffffff800, fffffffffffff800, } VSRI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSRI_Nmax shift amount output: +VSRI_N:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:1:result_int16x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:2:result_int32x2 [] = { fffffff0, fffffff1, } +VSRI_N:3:result_int64x1 [] = { fffffffffffffff0, } +VSRI_N:4:result_uint8x8 [] = { f0, f1, f2, f3, f4, f5, f6, f7, } +VSRI_N:5:result_uint16x4 [] = { fff0, fff1, fff2, fff3, } +VSRI_N:6:result_uint32x2 [] = { fffffff0, fffffff1, } +VSRI_N:7:result_uint64x1 [] = { fffffffffffffff0, } +VSRI_N:8:result_float32x2 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } +VSRI_N:9:result_int8x16 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, } +VSRI_N:10:result_int16x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, } +VSRI_N:11:result_int32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:12:result_int64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:13:result_uint8x16 [] = { f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, } +VSRI_N:14:result_uint16x8 [] = { fff0, fff1, fff2, fff3, fff4, fff5, fff6, fff7, } +VSRI_N:15:result_uint32x4 [] = { fffffff0, fffffff1, fffffff2, fffffff3, } +VSRI_N:16:result_uint64x2 [] = { fffffffffffffff0, fffffffffffffff1, } +VSRI_N:17:result_float32x4 [] = { 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, 33333333 0x1.666666p-25 4.17233e-08, } + VTST/VTSTQ (signed input) output: VTST/VTSTQ:0:result_int8x8 [] = { 33, 33, 33, 33, 33, 33, 33, 33, } VTST/VTSTQ:1:result_int16x4 [] = { 3333, 3333, 3333, 3333, } diff --git a/ref_vsXi_n.c b/ref_vsXi_n.c new file mode 100644 index 0000000..20f207a --- /dev/null +++ b/ref_vsXi_n.c @@ -0,0 +1,108 @@ +/* + +Copyright (c) 2009, 2010, 2011 STMicroelectronics +Written by Christophe Lyon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifdef __arm__ +#include <arm_neon.h> +#else +#error Target not supported +#endif + +#include "stm-arm-neon-ref.h" + +#ifndef INSN_NAME +#define INSN_NAME vsli +#define TEST_MSG "VSLI_N" +#endif + +#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN_NAME) +{ + /* vector_res = vsxi_n(vector, vector2, val), + then store the result. */ +#define TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VSXI_N(INSN, Q, T1, T2, W, N, V) \ + TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V) + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + TEST_VDUP(vector2, , int, s, 8, 8, 2); + TEST_VDUP(vector2, , int, s, 16, 4, -4); + TEST_VDUP(vector2, , int, s, 32, 2, 3); + TEST_VDUP(vector2, , int, s, 64, 1, 100); + TEST_VDUP(vector2, , uint, u, 8, 8, 20); + TEST_VDUP(vector2, , uint, u, 16, 4, 30); + TEST_VDUP(vector2, , uint, u, 32, 2, 40); + TEST_VDUP(vector2, , uint, u, 64, 1, 2); + TEST_VDUP(vector2, q, int, s, 8, 16, -10); + TEST_VDUP(vector2, q, int, s, 16, 8, -20); + TEST_VDUP(vector2, q, int, s, 32, 4, -30); + TEST_VDUP(vector2, q, int, s, 64, 2, 24); + TEST_VDUP(vector2, q, uint, u, 8, 16, 12); + TEST_VDUP(vector2, q, uint, u, 16, 8, 3); + TEST_VDUP(vector2, q, uint, u, 32, 4, 55); + TEST_VDUP(vector2, q, uint, u, 64, 2, 3); + + /* Choose shift amount arbitrarily */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 4); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 3); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 1); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 32); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 2); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 10); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 30); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 3); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 5); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 3); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 20); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 16); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 3); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 12); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 23); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 53); + + dump_results_hex (TEST_MSG); + +#ifdef EXTRA_TESTS + EXTRA_TESTS(); +#endif +} diff --git a/ref_vsli_n.c b/ref_vsli_n.c index 2666af6..5f6ed23 100644 --- a/ref_vsli_n.c +++ b/ref_vsli_n.c @@ -23,35 +23,19 @@ THE SOFTWARE. */ -#ifdef __arm__ -#include <arm_neon.h> -#else -#error Target not supported -#endif - -#include "stm-arm-neon-ref.h" - -#ifndef INSN_NAME #define INSN_NAME vsli #define TEST_MSG "VSLI_N" -#endif -#define FNNAME1(NAME) void exec_ ## NAME ##_n (void) -#define FNNAME(NAME) FNNAME1(NAME) +/* Extra tests for functions requiring corner cases tests */ +void vsli_extra(void); +#define EXTRA_TESTS vsli_extra -FNNAME (INSN_NAME) -{ - /* vector_res = vmlx_n(vector, vector2, val), - then store the result. */ -#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N), \ - V); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) +#include "ref_vsXi_n.c" -#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V) \ - TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V) +void vsli_extra(void) +{ + /* Test cases with maximum shift amount (this amount is different + * from vsri. */ /* With ARM RVCT, we need to declare variables before any executable statement */ @@ -82,23 +66,23 @@ FNNAME (INSN_NAME) TEST_VDUP(vector2, q, uint, u, 32, 4, 55); TEST_VDUP(vector2, q, uint, u, 64, 2, 3); - /* Choose shift amount arbitrarily */ - TEST_VMLX_N(INSN_NAME, , int, s, 8, 8, 4); - TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 3); - TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 1); - TEST_VMLX_N(INSN_NAME, , int, s, 64, 1, 32); - TEST_VMLX_N(INSN_NAME, , uint, u, 8, 8, 2); - TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 10); - TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 30); - TEST_VMLX_N(INSN_NAME, , uint, u, 64, 1, 3); - TEST_VMLX_N(INSN_NAME, q, int, s, 8, 16, 5); - TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 3); - TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 20); - TEST_VMLX_N(INSN_NAME, q, int, s, 64, 2, 16); - TEST_VMLX_N(INSN_NAME, q, uint, u, 8, 16, 3); - TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 12); - TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 23); - TEST_VMLX_N(INSN_NAME, q, uint, u, 64, 2, 53); - - dump_results_hex (TEST_MSG); + /* Use maximum allowed shift amount */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 7); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 15); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 31); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 63); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 7); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 15); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 31); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 63); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 7); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 15); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 31); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 63); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 7); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 15); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 31); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 63); + + dump_results_hex2 (TEST_MSG, "max shift amount"); } diff --git a/ref_vsri_n.c b/ref_vsri_n.c index a4e916a..90ed127 100644 --- a/ref_vsri_n.c +++ b/ref_vsri_n.c @@ -26,4 +26,63 @@ THE SOFTWARE. #define INSN_NAME vsri #define TEST_MSG "VSRI_N" -#include "ref_vsli_n.c" +/* Extra tests for functions requiring corner cases tests */ +void vsri_extra(void); +#define EXTRA_TESTS vsri_extra + +#include "ref_vsXi_n.c" + +void vsri_extra(void) +{ + /* Test cases with maximum shift amount (this amount is different + * from vsli. */ + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer" */ + TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer); + + /* Fill input vector2 with arbitrary values */ + TEST_VDUP(vector2, , int, s, 8, 8, 2); + TEST_VDUP(vector2, , int, s, 16, 4, -4); + TEST_VDUP(vector2, , int, s, 32, 2, 3); + TEST_VDUP(vector2, , int, s, 64, 1, 100); + TEST_VDUP(vector2, , uint, u, 8, 8, 20); + TEST_VDUP(vector2, , uint, u, 16, 4, 30); + TEST_VDUP(vector2, , uint, u, 32, 2, 40); + TEST_VDUP(vector2, , uint, u, 64, 1, 2); + TEST_VDUP(vector2, q, int, s, 8, 16, -10); + TEST_VDUP(vector2, q, int, s, 16, 8, -20); + TEST_VDUP(vector2, q, int, s, 32, 4, -30); + TEST_VDUP(vector2, q, int, s, 64, 2, 24); + TEST_VDUP(vector2, q, uint, u, 8, 16, 12); + TEST_VDUP(vector2, q, uint, u, 16, 8, 3); + TEST_VDUP(vector2, q, uint, u, 32, 4, 55); + TEST_VDUP(vector2, q, uint, u, 64, 2, 3); + + /* Use maximum allowed shift amount */ + TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 8); + TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 16); + TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 32); + TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 64); + TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 8); + TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 16); + TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 32); + TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 64); + TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 8); + TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 16); + TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 32); + TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 64); + TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 8); + TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 16); + TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 32); + TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 64); + + dump_results_hex2 (TEST_MSG, "max shift amount"); +} |