diff options
author | Martyn Capewell <martyn.capewell@arm.com> | 2020-03-24 16:16:36 +0000 |
---|---|---|
committer | Jacob Bramley <jacob.bramley@arm.com> | 2020-06-19 09:41:25 +0100 |
commit | 5f9b38004693feac1d20d4442ce9c78a182ed871 (patch) | |
tree | e3d65021d618d4270c4c05befbea19afe4405181 /test | |
parent | d154a44f2d7136589efc878d2165fe27f72848a4 (diff) | |
download | vixl-5f9b38004693feac1d20d4442ce9c78a182ed871.tar.gz |
[sve] Implement ContiguousNonFaultLoad
Implement ldnf1b/h/w/d/sb/sh/sw instructions for immediate offsets.
Change-Id: I0801e8125c4cbf4cf2fdeb250a3a543e4e14af60
Diffstat (limited to 'test')
-rw-r--r-- | test/aarch64/test-assembler-sve-aarch64.cc | 109 | ||||
-rw-r--r-- | test/aarch64/test-disasm-sve-aarch64.cc | 80 |
2 files changed, 170 insertions, 19 deletions
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc index 6fb1092e..34f37e29 100644 --- a/test/aarch64/test-assembler-sve-aarch64.cc +++ b/test/aarch64/test-assembler-sve-aarch64.cc @@ -8868,7 +8868,7 @@ static void Ldff1Helper(Test* config, size_t offset_modifier = 0; - // The highest adddress at which a load stopped. Every FF load should fault at + // The highest address at which a load stopped. Every FF load should fault at // `data + page_size`, so this value should not exceed that value. However, // the architecture allows fault-tolerant loads to fault arbitrarily, so the // real value may be lower. @@ -9371,6 +9371,113 @@ TEST_SVE(sve_ldff1_scalar_plus_vector) { munmap(reinterpret_cast<void*>(data), page_size * 2); } +TEST_SVE(sve_ldnf1) { + SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, + CPUFeatures::kNEON, + CPUFeatures::kFP); + START(); + + size_t page_size = sysconf(_SC_PAGE_SIZE); + VIXL_ASSERT(page_size > static_cast<size_t>(config->sve_vl_in_bytes())); + + // Allocate two pages, fill them with data, then mprotect the second one to + // make it inaccessible. + uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL, + page_size * 2, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0)); + + // Fill the pages with arbitrary data. + for (size_t i = 0; i < page_size; i++) { + // Reverse bits so we get a mixture of positive and negative values. + uint8_t byte = ReverseBits(static_cast<uint8_t>(i)); + memcpy(reinterpret_cast<void*>(data + i), &byte, 1); + } + + mprotect(reinterpret_cast<void*>(data + page_size), page_size, PROT_NONE); + + __ Setffr(); + __ Ptrue(p0.VnB()); + __ Dup(z10.VnB(), 0); + + // Move an address that points to the last unprotected eight bytes. + __ Mov(x0, data + page_size - (kQRegSizeInBytes / kBRegSizeInBytes) / 2); + + // Load, non-faulting, a vector of bytes from x0. At most, eight bytes will be + // loaded, the rest being in a protected page. + __ Ldnf1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0)); + __ Rdffr(p1.VnB()); + __ Setffr(); + + // Create references using the FFR value in p1 to zero the undefined lanes. + __ Sel(z0.VnB(), p1, z0.VnB(), z10.VnB()); + __ Ld1b(z20.VnB(), p1.Zeroing(), SVEMemOperand(x0)); + + // Repeat for larger elements and different addresses, giving different FFR + // results. + __ Add(x1, x0, 1); + __ Ldnf1h(z1.VnH(), p0.Zeroing(), SVEMemOperand(x1)); + __ Rdffr(p1.VnB()); + __ Setffr(); + __ Sel(z1.VnH(), p1, z1.VnH(), z10.VnH()); + __ Ld1h(z21.VnH(), p1.Zeroing(), SVEMemOperand(x1)); + + __ Add(x1, x0, 2); + __ Ldnf1w(z2.VnS(), p0.Zeroing(), SVEMemOperand(x1)); + __ Rdffr(p1.VnB()); + __ Setffr(); + __ Sel(z2.VnS(), p1, z2.VnS(), z10.VnS()); + __ Ld1w(z22.VnS(), p1.Zeroing(), SVEMemOperand(x1)); + + __ Sub(x1, x0, 1); + __ Ldnf1d(z3.VnD(), p0.Zeroing(), SVEMemOperand(x1)); + __ Rdffr(p1.VnB()); + __ Setffr(); + __ Sel(z3.VnD(), p1, z3.VnD(), z10.VnD()); + __ Ld1d(z23.VnD(), p1.Zeroing(), SVEMemOperand(x1)); + + // Load from previous VL-sized area of memory. All of this should be in the + // accessible page. + __ Ldnf1b(z4.VnB(), p0.Zeroing(), SVEMemOperand(x0, -1, SVE_MUL_VL)); + __ Rdffr(p1.VnB()); + __ Setffr(); + __ Sel(z4.VnB(), p1, z4.VnB(), z10.VnB()); + __ Ld1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x0, -1, SVE_MUL_VL)); + + // Repeat partial load for larger element size. + __ Mov(x0, data + page_size - (kQRegSizeInBytes / kSRegSizeInBytes) / 2); + __ Ldnf1b(z5.VnS(), p0.Zeroing(), SVEMemOperand(x0)); + __ Rdffr(p1.VnB()); + __ Setffr(); + __ Sel(z5.VnS(), p1, z5.VnS(), z10.VnS()); + __ Ld1b(z25.VnS(), p1.Zeroing(), SVEMemOperand(x0)); + + // Repeat for sign extension. + __ Mov(x0, data + page_size - (kQRegSizeInBytes / kHRegSizeInBytes) / 2); + __ Ldnf1sb(z6.VnH(), p0.Zeroing(), SVEMemOperand(x0)); + __ Rdffr(p1.VnB()); + __ Setffr(); + __ Sel(z6.VnH(), p1, z6.VnH(), z10.VnH()); + __ Ld1sb(z26.VnH(), p1.Zeroing(), SVEMemOperand(x0)); + + END(); + + if (CAN_RUN()) { + RUN(); + ASSERT_EQUAL_SVE(z20, z0); + ASSERT_EQUAL_SVE(z21, z1); + ASSERT_EQUAL_SVE(z22, z2); + ASSERT_EQUAL_SVE(z23, z3); + ASSERT_EQUAL_SVE(z24, z4); + ASSERT_EQUAL_SVE(z25, z5); + ASSERT_EQUAL_SVE(z26, z6); + } + + munmap(reinterpret_cast<void*>(data), page_size * 2); +} + // Test gather loads by comparing them with the result of a set of equivalent // scalar loads. static void GatherLoadScalarPlusVectorHelper(Test* config, diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc index 3a6ab7d5..bb0ef81d 100644 --- a/test/aarch64/test-disasm-sve-aarch64.cc +++ b/test/aarch64/test-disasm-sve-aarch64.cc @@ -4481,24 +4481,68 @@ TEST(sve_mem_contiguous_load) { "add x16, x13, #0x3 (3)\n" "ldnt1d { z5.d }, p3/z, [x16]"); -#if 0 - COMPARE_PREFIX(ldnf1b(z1.VnH(), p0.Zeroing(), x25, int imm4), "ldnf1b { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1b(z0.VnS(), p0.Zeroing(), x2, int imm4), "ldnf1b { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1b(z31.VnD(), p6.Zeroing(), x0, int imm4), "ldnf1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1b(z25.VnB(), p1.Zeroing(), x5, int imm4), "ldnf1b { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1d(z25.VnD(), p0.Zeroing(), x11, int imm4), "ldnf1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1h(z22.VnH(), p4.Zeroing(), x7, int imm4), "ldnf1h { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1h(z7.VnS(), p2.Zeroing(), x1, int imm4), "ldnf1h { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1h(z5.VnD(), p3.Zeroing(), x29, int imm4), "ldnf1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1sb(z12.VnH(), p5.Zeroing(), x27, int imm4), "ldnf1sb { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1sb(z10.VnS(), p2.Zeroing(), x13, int imm4), "ldnf1sb { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1sb(z25.VnD(), p6.Zeroing(), x26, int imm4), "ldnf1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1sh(z3.VnS(), p5.Zeroing(), x1, int imm4), "ldnf1sh { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1sh(z8.VnD(), p6.Zeroing(), x13, int imm4), "ldnf1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1sw(z5.VnD(), p6.Zeroing(), x2, int imm4), "ldnf1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1w(z11.VnS(), p3.Zeroing(), x26, int imm4), "ldnf1w { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); - COMPARE_PREFIX(ldnf1w(z10.VnD(), p6.Zeroing(), x12, int imm4), "ldnf1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]"); -#endif + COMPARE_PREFIX(ldnf1b(z1.VnH(), + p0.Zeroing(), + SVEMemOperand(x25, -8, SVE_MUL_VL)), + "ldnf1b { z1.h }, p0/z, [x25, #-8, mul vl]"); + COMPARE_PREFIX(ldnf1b(z0.VnS(), + p0.Zeroing(), + SVEMemOperand(x2, 7, SVE_MUL_VL)), + "ldnf1b { z0.s }, p0/z, [x2, #7, mul vl]"); + COMPARE_PREFIX(ldnf1b(z31.VnD(), + p6.Zeroing(), + SVEMemOperand(x0, -7, SVE_MUL_VL)), + "ldnf1b { z31.d }, p6/z, [x0, #-7, mul vl]"); + COMPARE_PREFIX(ldnf1b(z25.VnB(), + p1.Zeroing(), + SVEMemOperand(x5, 6, SVE_MUL_VL)), + "ldnf1b { z25.b }, p1/z, [x5, #6, mul vl]"); + COMPARE_PREFIX(ldnf1d(z25.VnD(), + p0.Zeroing(), + SVEMemOperand(x11, -6, SVE_MUL_VL)), + "ldnf1d { z25.d }, p0/z, [x11, #-6, mul vl]"); + COMPARE_PREFIX(ldnf1h(z22.VnH(), + p4.Zeroing(), + SVEMemOperand(x7, 5, SVE_MUL_VL)), + "ldnf1h { z22.h }, p4/z, [x7, #5, mul vl]"); + COMPARE_PREFIX(ldnf1h(z7.VnS(), + p2.Zeroing(), + SVEMemOperand(x1, -5, SVE_MUL_VL)), + "ldnf1h { z7.s }, p2/z, [x1, #-5, mul vl]"); + COMPARE_PREFIX(ldnf1h(z5.VnD(), + p3.Zeroing(), + SVEMemOperand(x29, 4, SVE_MUL_VL)), + "ldnf1h { z5.d }, p3/z, [x29, #4, mul vl]"); + COMPARE_PREFIX(ldnf1sb(z12.VnH(), + p5.Zeroing(), + SVEMemOperand(x27, -4, SVE_MUL_VL)), + "ldnf1sb { z12.h }, p5/z, [x27, #-4, mul vl]"); + COMPARE_PREFIX(ldnf1sb(z10.VnS(), + p2.Zeroing(), + SVEMemOperand(x13, 3, SVE_MUL_VL)), + "ldnf1sb { z10.s }, p2/z, [x13, #3, mul vl]"); + COMPARE_PREFIX(ldnf1sb(z25.VnD(), + p6.Zeroing(), + SVEMemOperand(x26, -3, SVE_MUL_VL)), + "ldnf1sb { z25.d }, p6/z, [x26, #-3, mul vl]"); + COMPARE_PREFIX(ldnf1sh(z3.VnS(), + p5.Zeroing(), + SVEMemOperand(x1, 2, SVE_MUL_VL)), + "ldnf1sh { z3.s }, p5/z, [x1, #2, mul vl]"); + COMPARE_PREFIX(ldnf1sh(z8.VnD(), + p6.Zeroing(), + SVEMemOperand(x13, -2, SVE_MUL_VL)), + "ldnf1sh { z8.d }, p6/z, [x13, #-2, mul vl]"); + COMPARE_PREFIX(ldnf1sw(z5.VnD(), + p6.Zeroing(), + SVEMemOperand(x2, 1, SVE_MUL_VL)), + "ldnf1sw { z5.d }, p6/z, [x2, #1, mul vl]"); + COMPARE_PREFIX(ldnf1w(z11.VnS(), + p3.Zeroing(), + SVEMemOperand(sp, -1, SVE_MUL_VL)), + "ldnf1w { z11.s }, p3/z, [sp, #-1, mul vl]"); + COMPARE_PREFIX(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)), + "ldnf1w { z10.d }, p6/z, [x12]"); CLEANUP(); } |