aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorMartyn Capewell <martyn.capewell@arm.com>2020-03-24 16:16:36 +0000
committerJacob Bramley <jacob.bramley@arm.com>2020-06-19 09:41:25 +0100
commit5f9b38004693feac1d20d4442ce9c78a182ed871 (patch)
treee3d65021d618d4270c4c05befbea19afe4405181 /test
parentd154a44f2d7136589efc878d2165fe27f72848a4 (diff)
downloadvixl-5f9b38004693feac1d20d4442ce9c78a182ed871.tar.gz
[sve] Implement ContiguousNonFaultLoad
Implement ldnf1b/h/w/d/sb/sh/sw instructions for immediate offsets. Change-Id: I0801e8125c4cbf4cf2fdeb250a3a543e4e14af60
Diffstat (limited to 'test')
-rw-r--r--test/aarch64/test-assembler-sve-aarch64.cc109
-rw-r--r--test/aarch64/test-disasm-sve-aarch64.cc80
2 files changed, 170 insertions, 19 deletions
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc
index 6fb1092e..34f37e29 100644
--- a/test/aarch64/test-assembler-sve-aarch64.cc
+++ b/test/aarch64/test-assembler-sve-aarch64.cc
@@ -8868,7 +8868,7 @@ static void Ldff1Helper(Test* config,
size_t offset_modifier = 0;
- // The highest adddress at which a load stopped. Every FF load should fault at
+ // The highest address at which a load stopped. Every FF load should fault at
// `data + page_size`, so this value should not exceed that value. However,
// the architecture allows fault-tolerant loads to fault arbitrarily, so the
// real value may be lower.
@@ -9371,6 +9371,113 @@ TEST_SVE(sve_ldff1_scalar_plus_vector) {
munmap(reinterpret_cast<void*>(data), page_size * 2);
}
+TEST_SVE(sve_ldnf1) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kFP);
+ START();
+
+ size_t page_size = sysconf(_SC_PAGE_SIZE);
+ VIXL_ASSERT(page_size > static_cast<size_t>(config->sve_vl_in_bytes()));
+
+ // Allocate two pages, fill them with data, then mprotect the second one to
+ // make it inaccessible.
+ uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
+ page_size * 2,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1,
+ 0));
+
+ // Fill the pages with arbitrary data.
+ for (size_t i = 0; i < page_size; i++) {
+ // Reverse bits so we get a mixture of positive and negative values.
+ uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
+ memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
+ }
+
+ mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE);
+
+ __ Setffr();
+ __ Ptrue(p0.VnB());
+ __ Dup(z10.VnB(), 0);
+
+ // Move an address that points to the last unprotected eight bytes.
+ __ Mov(x0, data + page_size - (kQRegSizeInBytes / kBRegSizeInBytes) / 2);
+
+ // Load, non-faulting, a vector of bytes from x0. At most, eight bytes will be
+ // loaded, the rest being in a protected page.
+ __ Ldnf1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+
+ // Create references using the FFR value in p1 to zero the undefined lanes.
+ __ Sel(z0.VnB(), p1, z0.VnB(), z10.VnB());
+ __ Ld1b(z20.VnB(), p1.Zeroing(), SVEMemOperand(x0));
+
+ // Repeat for larger elements and different addresses, giving different FFR
+ // results.
+ __ Add(x1, x0, 1);
+ __ Ldnf1h(z1.VnH(), p0.Zeroing(), SVEMemOperand(x1));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+ __ Sel(z1.VnH(), p1, z1.VnH(), z10.VnH());
+ __ Ld1h(z21.VnH(), p1.Zeroing(), SVEMemOperand(x1));
+
+ __ Add(x1, x0, 2);
+ __ Ldnf1w(z2.VnS(), p0.Zeroing(), SVEMemOperand(x1));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+ __ Sel(z2.VnS(), p1, z2.VnS(), z10.VnS());
+ __ Ld1w(z22.VnS(), p1.Zeroing(), SVEMemOperand(x1));
+
+ __ Sub(x1, x0, 1);
+ __ Ldnf1d(z3.VnD(), p0.Zeroing(), SVEMemOperand(x1));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+ __ Sel(z3.VnD(), p1, z3.VnD(), z10.VnD());
+ __ Ld1d(z23.VnD(), p1.Zeroing(), SVEMemOperand(x1));
+
+ // Load from previous VL-sized area of memory. All of this should be in the
+ // accessible page.
+ __ Ldnf1b(z4.VnB(), p0.Zeroing(), SVEMemOperand(x0, -1, SVE_MUL_VL));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+ __ Sel(z4.VnB(), p1, z4.VnB(), z10.VnB());
+ __ Ld1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x0, -1, SVE_MUL_VL));
+
+ // Repeat partial load for larger element size.
+ __ Mov(x0, data + page_size - (kQRegSizeInBytes / kSRegSizeInBytes) / 2);
+ __ Ldnf1b(z5.VnS(), p0.Zeroing(), SVEMemOperand(x0));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+ __ Sel(z5.VnS(), p1, z5.VnS(), z10.VnS());
+ __ Ld1b(z25.VnS(), p1.Zeroing(), SVEMemOperand(x0));
+
+ // Repeat for sign extension.
+ __ Mov(x0, data + page_size - (kQRegSizeInBytes / kHRegSizeInBytes) / 2);
+ __ Ldnf1sb(z6.VnH(), p0.Zeroing(), SVEMemOperand(x0));
+ __ Rdffr(p1.VnB());
+ __ Setffr();
+ __ Sel(z6.VnH(), p1, z6.VnH(), z10.VnH());
+ __ Ld1sb(z26.VnH(), p1.Zeroing(), SVEMemOperand(x0));
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_SVE(z20, z0);
+ ASSERT_EQUAL_SVE(z21, z1);
+ ASSERT_EQUAL_SVE(z22, z2);
+ ASSERT_EQUAL_SVE(z23, z3);
+ ASSERT_EQUAL_SVE(z24, z4);
+ ASSERT_EQUAL_SVE(z25, z5);
+ ASSERT_EQUAL_SVE(z26, z6);
+ }
+
+ munmap(reinterpret_cast<void*>(data), page_size * 2);
+}
+
// Test gather loads by comparing them with the result of a set of equivalent
// scalar loads.
static void GatherLoadScalarPlusVectorHelper(Test* config,
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 3a6ab7d5..bb0ef81d 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -4481,24 +4481,68 @@ TEST(sve_mem_contiguous_load) {
"add x16, x13, #0x3 (3)\n"
"ldnt1d { z5.d }, p3/z, [x16]");
-#if 0
- COMPARE_PREFIX(ldnf1b(z1.VnH(), p0.Zeroing(), x25, int imm4), "ldnf1b { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1b(z0.VnS(), p0.Zeroing(), x2, int imm4), "ldnf1b { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1b(z31.VnD(), p6.Zeroing(), x0, int imm4), "ldnf1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1b(z25.VnB(), p1.Zeroing(), x5, int imm4), "ldnf1b { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1d(z25.VnD(), p0.Zeroing(), x11, int imm4), "ldnf1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1h(z22.VnH(), p4.Zeroing(), x7, int imm4), "ldnf1h { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1h(z7.VnS(), p2.Zeroing(), x1, int imm4), "ldnf1h { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1h(z5.VnD(), p3.Zeroing(), x29, int imm4), "ldnf1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1sb(z12.VnH(), p5.Zeroing(), x27, int imm4), "ldnf1sb { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1sb(z10.VnS(), p2.Zeroing(), x13, int imm4), "ldnf1sb { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1sb(z25.VnD(), p6.Zeroing(), x26, int imm4), "ldnf1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1sh(z3.VnS(), p5.Zeroing(), x1, int imm4), "ldnf1sh { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1sh(z8.VnD(), p6.Zeroing(), x13, int imm4), "ldnf1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1sw(z5.VnD(), p6.Zeroing(), x2, int imm4), "ldnf1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1w(z11.VnS(), p3.Zeroing(), x26, int imm4), "ldnf1w { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
- COMPARE_PREFIX(ldnf1w(z10.VnD(), p6.Zeroing(), x12, int imm4), "ldnf1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]");
-#endif
+ COMPARE_PREFIX(ldnf1b(z1.VnH(),
+ p0.Zeroing(),
+ SVEMemOperand(x25, -8, SVE_MUL_VL)),
+ "ldnf1b { z1.h }, p0/z, [x25, #-8, mul vl]");
+ COMPARE_PREFIX(ldnf1b(z0.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x2, 7, SVE_MUL_VL)),
+ "ldnf1b { z0.s }, p0/z, [x2, #7, mul vl]");
+ COMPARE_PREFIX(ldnf1b(z31.VnD(),
+ p6.Zeroing(),
+ SVEMemOperand(x0, -7, SVE_MUL_VL)),
+ "ldnf1b { z31.d }, p6/z, [x0, #-7, mul vl]");
+ COMPARE_PREFIX(ldnf1b(z25.VnB(),
+ p1.Zeroing(),
+ SVEMemOperand(x5, 6, SVE_MUL_VL)),
+ "ldnf1b { z25.b }, p1/z, [x5, #6, mul vl]");
+ COMPARE_PREFIX(ldnf1d(z25.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x11, -6, SVE_MUL_VL)),
+ "ldnf1d { z25.d }, p0/z, [x11, #-6, mul vl]");
+ COMPARE_PREFIX(ldnf1h(z22.VnH(),
+ p4.Zeroing(),
+ SVEMemOperand(x7, 5, SVE_MUL_VL)),
+ "ldnf1h { z22.h }, p4/z, [x7, #5, mul vl]");
+ COMPARE_PREFIX(ldnf1h(z7.VnS(),
+ p2.Zeroing(),
+ SVEMemOperand(x1, -5, SVE_MUL_VL)),
+ "ldnf1h { z7.s }, p2/z, [x1, #-5, mul vl]");
+ COMPARE_PREFIX(ldnf1h(z5.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x29, 4, SVE_MUL_VL)),
+ "ldnf1h { z5.d }, p3/z, [x29, #4, mul vl]");
+ COMPARE_PREFIX(ldnf1sb(z12.VnH(),
+ p5.Zeroing(),
+ SVEMemOperand(x27, -4, SVE_MUL_VL)),
+ "ldnf1sb { z12.h }, p5/z, [x27, #-4, mul vl]");
+ COMPARE_PREFIX(ldnf1sb(z10.VnS(),
+ p2.Zeroing(),
+ SVEMemOperand(x13, 3, SVE_MUL_VL)),
+ "ldnf1sb { z10.s }, p2/z, [x13, #3, mul vl]");
+ COMPARE_PREFIX(ldnf1sb(z25.VnD(),
+ p6.Zeroing(),
+ SVEMemOperand(x26, -3, SVE_MUL_VL)),
+ "ldnf1sb { z25.d }, p6/z, [x26, #-3, mul vl]");
+ COMPARE_PREFIX(ldnf1sh(z3.VnS(),
+ p5.Zeroing(),
+ SVEMemOperand(x1, 2, SVE_MUL_VL)),
+ "ldnf1sh { z3.s }, p5/z, [x1, #2, mul vl]");
+ COMPARE_PREFIX(ldnf1sh(z8.VnD(),
+ p6.Zeroing(),
+ SVEMemOperand(x13, -2, SVE_MUL_VL)),
+ "ldnf1sh { z8.d }, p6/z, [x13, #-2, mul vl]");
+ COMPARE_PREFIX(ldnf1sw(z5.VnD(),
+ p6.Zeroing(),
+ SVEMemOperand(x2, 1, SVE_MUL_VL)),
+ "ldnf1sw { z5.d }, p6/z, [x2, #1, mul vl]");
+ COMPARE_PREFIX(ldnf1w(z11.VnS(),
+ p3.Zeroing(),
+ SVEMemOperand(sp, -1, SVE_MUL_VL)),
+ "ldnf1w { z11.s }, p3/z, [sp, #-1, mul vl]");
+ COMPARE_PREFIX(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)),
+ "ldnf1w { z10.d }, p6/z, [x12]");
CLEANUP();
}