diff options
author | TatWai Chong <tatwai.chong@arm.com> | 2020-06-01 20:54:06 -0700 |
---|---|---|
committer | TatWai Chong <tatwai.chong@arm.com> | 2020-06-01 22:47:12 -0700 |
commit | 1af34f12ac11ab47651eee548ac4e570cef39c05 (patch) | |
tree | 0773d9f608fede7e4033e3327707155efabc0292 /test | |
parent | 6537a9a73eeaff4b2534da52d4151462030a234c (diff) | |
download | vixl-1af34f12ac11ab47651eee548ac4e570cef39c05.tar.gz |
[sve] Implement gather load first-fault data to 64-bit vector (vector index).
Include ldff1b, ldff1h, ldff1w, ldff1d, ldff1sb, ldff1sh and ldffsw.
Change-Id: I256831727dc9d83b6d13eb5b8d9c15b0192cc277
Diffstat (limited to 'test')
-rw-r--r-- | test/aarch64/test-assembler-sve-aarch64.cc | 376 | ||||
-rw-r--r-- | test/aarch64/test-disasm-sve-aarch64.cc | 124 |
2 files changed, 413 insertions, 87 deletions
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc index 5e93752d..ede7b7a1 100644 --- a/test/aarch64/test-assembler-sve-aarch64.cc +++ b/test/aarch64/test-assembler-sve-aarch64.cc @@ -32,6 +32,7 @@ #include <cstdio> #include <cstdlib> #include <cstring> +#include <functional> #include "test-runner.h" #include "test-utils.h" @@ -8832,14 +8833,14 @@ typedef void (MacroAssembler::*Ld1Macro)(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr); -template <typename T> static void Ldff1Helper(Test* config, uintptr_t data, unsigned msize_in_bits, unsigned esize_in_bits, + CPURegister::RegisterType base_type, Ld1Macro ldff1, Ld1Macro ld1, - T mod, + SVEOffsetModifier mod, bool scale = false) { SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE); START(); @@ -8889,16 +8890,36 @@ static void Ldff1Helper(Test* config, __ Mov(x20, data); __ Mov(x21, offset); - if (std::is_same<T, vixl::aarch64::Shift>::value) { - VIXL_ASSERT(scale == false); - // Scalar plus scalar mode. - (masm.*ldff1)(z0.WithLaneSize(esize_in_bits), - all.Zeroing(), - SVEMemOperand(x20, x21, mod, msize_in_bytes_log2)); + if (base_type == CPURegister::kRegister) { + // Scalar-plus-scalar mode. + if ((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER)) { + (masm.*ldff1)(z0.WithLaneSize(esize_in_bits), + all.Zeroing(), + SVEMemOperand(x20, x21, mod, msize_in_bytes_log2)); + } else { + VIXL_UNIMPLEMENTED(); + } + + } else if (base_type == CPURegister::kZRegister) { + int offs_size; + bool offs_is_unsigned; + if ((mod == SVE_SXTW) || (mod == SVE_UXTW)) { + // Scalar-plus-vector mode with 32-bit optional unpacked or upacked, and + // unscaled or scaled offset. + if (scale == true) { + // Gather first-fault bytes load doesn't support scaled offset. + VIXL_ASSERT(msize_in_bits != kBRegSize); + } + offs_is_unsigned = (mod == SVE_UXTW) ? true : false; + offs_size = kSRegSize; + + } else { + // Scalar-plus-vector mode with 64-bit unscaled or scaled offset. + VIXL_ASSERT((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER)); + offs_is_unsigned = false; + offs_size = kDRegSize; + } - } else if (std::is_same<T, vixl::aarch64::Extend>::value) { - VIXL_ASSERT((static_cast<int>(mod) == UXTW) || - (static_cast<int>(mod) == SXTW)); // For generating the pattern of "base address + index << shift". // In case of unscaled-offset operation, use `msize_in_bytes` be an offset // of each decreasing memory accesses. otherwise, decreases the indexes by 1 @@ -8906,12 +8927,10 @@ static void Ldff1Helper(Test* config, int shift = (scale == true) ? msize_in_bytes_log2 : 0; int index_offset = msize_in_bytes >> shift; VIXL_ASSERT(index_offset > 0); - // TODO `offs_size` can be different once 64-bit offset is supported. - int offs_size = kSRegSize; uint64_t index = 0; uint64_t base_address = 0; - if (static_cast<int>(mod) == UXTW) { + if (offs_is_unsigned == true) { // Base address. base_address = data; // Maximum unsigned positive index. @@ -8942,6 +8961,8 @@ static void Ldff1Helper(Test* config, ldff1)(z0.WithLaneSize(esize_in_bits), all.Zeroing(), SVEMemOperand(x19, z17.WithLaneSize(esize_in_bits), mod, shift)); + } else { + VIXL_UNIMPLEMENTED(); } __ Rdffrs(p0.VnB(), all.Zeroing()); @@ -9020,44 +9041,299 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) { memcpy(reinterpret_cast<void*>(data + i), &byte, 1); } + auto ldff1_unscaled_offset_helper = std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + std::placeholders::_2, + CPURegister::kRegister, + std::placeholders::_3, + std::placeholders::_4, + NO_SVE_OFFSET_MODIFIER, + false); + Ld1Macro ldff1b = &MacroAssembler::Ldff1b; Ld1Macro ld1b = &MacroAssembler::Ld1b; - Ldff1Helper(config, data, kBRegSize, kBRegSize, ldff1b, ld1b, LSL); - Ldff1Helper(config, data, kBRegSize, kHRegSize, ldff1b, ld1b, LSL); - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1b, ld1b, LSL); - Ldff1Helper(config, data, kBRegSize, kDRegSize, ldff1b, ld1b, LSL); + ldff1_unscaled_offset_helper(kBRegSize, kBRegSize, ldff1b, ld1b); + ldff1_unscaled_offset_helper(kBRegSize, kHRegSize, ldff1b, ld1b); + ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1b, ld1b); + ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1b, ld1b); + + Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb; + Ld1Macro ld1sb = &MacroAssembler::Ld1sb; + ldff1_unscaled_offset_helper(kBRegSize, kHRegSize, ldff1sb, ld1sb); + ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1sb, ld1sb); + ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1sb, ld1sb); + + auto ldff1_scaled_offset_helper = std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + std::placeholders::_2, + CPURegister::kRegister, + std::placeholders::_3, + std::placeholders::_4, + SVE_LSL, + true); Ld1Macro ldff1h = &MacroAssembler::Ldff1h; Ld1Macro ld1h = &MacroAssembler::Ld1h; - Ldff1Helper(config, data, kHRegSize, kHRegSize, ldff1h, ld1h, LSL); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, LSL); - Ldff1Helper(config, data, kHRegSize, kDRegSize, ldff1h, ld1h, LSL); + ldff1_scaled_offset_helper(kHRegSize, kHRegSize, ldff1h, ld1h); + ldff1_scaled_offset_helper(kHRegSize, kSRegSize, ldff1h, ld1h); + ldff1_scaled_offset_helper(kHRegSize, kDRegSize, ldff1h, ld1h); Ld1Macro ldff1w = &MacroAssembler::Ldff1w; Ld1Macro ld1w = &MacroAssembler::Ld1w; - Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, LSL); - Ldff1Helper(config, data, kSRegSize, kDRegSize, ldff1w, ld1w, LSL); + ldff1_scaled_offset_helper(kSRegSize, kSRegSize, ldff1w, ld1w); + ldff1_scaled_offset_helper(kSRegSize, kDRegSize, ldff1w, ld1w); Ld1Macro ldff1d = &MacroAssembler::Ldff1d; Ld1Macro ld1d = &MacroAssembler::Ld1d; - Ldff1Helper(config, data, kDRegSize, kDRegSize, ldff1d, ld1d, LSL); + ldff1_scaled_offset_helper(kDRegSize, kDRegSize, ldff1d, ld1d); + + Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; + Ld1Macro ld1sh = &MacroAssembler::Ld1sh; + ldff1_scaled_offset_helper(kHRegSize, kSRegSize, ldff1sh, ld1sh); + ldff1_scaled_offset_helper(kHRegSize, kDRegSize, ldff1sh, ld1sh); + + Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw; + Ld1Macro ld1sw = &MacroAssembler::Ld1sw; + ldff1_scaled_offset_helper(kSRegSize, kDRegSize, ldff1sw, ld1sw); + + munmap(reinterpret_cast<void*>(data), page_size * 2); +} + +static void sve_ldff1_scalar_plus_vector_32_scaled_offset(Test* config, + uintptr_t data) { + auto ldff1_32_scaled_offset_helper = std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + kSRegSize, + CPURegister::kZRegister, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4, + true); + Ld1Macro ldff1h = &MacroAssembler::Ldff1h; + Ld1Macro ld1h = &MacroAssembler::Ld1h; + ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW); + ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW); + + Ld1Macro ldff1w = &MacroAssembler::Ldff1w; + Ld1Macro ld1w = &MacroAssembler::Ld1w; + ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW); + ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW); + + Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; + Ld1Macro ld1sh = &MacroAssembler::Ld1sh; + ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW); + ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW); +} + +static void sve_ldff1_scalar_plus_vector_32_unscaled_offset(Test* config, + uintptr_t data) { + auto ldff1_32_unscaled_offset_helper = std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + kSRegSize, + CPURegister::kZRegister, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4, + false); + + Ld1Macro ldff1b = &MacroAssembler::Ldff1b; + Ld1Macro ld1b = &MacroAssembler::Ld1b; + ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW); + ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW); + + Ld1Macro ldff1h = &MacroAssembler::Ldff1h; + Ld1Macro ld1h = &MacroAssembler::Ld1h; + ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW); + ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW); + + Ld1Macro ldff1w = &MacroAssembler::Ldff1w; + Ld1Macro ld1w = &MacroAssembler::Ld1w; + ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW); + ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW); Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb; Ld1Macro ld1sb = &MacroAssembler::Ld1sb; - Ldff1Helper(config, data, kBRegSize, kHRegSize, ldff1sb, ld1sb, LSL); - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, LSL); - Ldff1Helper(config, data, kBRegSize, kDRegSize, ldff1sb, ld1sb, LSL); + ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW); + ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW); Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; Ld1Macro ld1sh = &MacroAssembler::Ld1sh; - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, LSL); - Ldff1Helper(config, data, kHRegSize, kDRegSize, ldff1sh, ld1sh, LSL); + ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW); + ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW); +} + +static void sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset( + Test* config, uintptr_t data) { + auto ldff1_32_unpacked_scaled_offset_helper = + std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + kDRegSize, + CPURegister::kZRegister, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4, + true); + + Ld1Macro ldff1h = &MacroAssembler::Ldff1h; + Ld1Macro ld1h = &MacroAssembler::Ld1h; + ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW); + ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW); + + Ld1Macro ldff1w = &MacroAssembler::Ldff1w; + Ld1Macro ld1w = &MacroAssembler::Ld1w; + ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW); + ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW); + + Ld1Macro ldff1d = &MacroAssembler::Ldff1d; + Ld1Macro ld1d = &MacroAssembler::Ld1d; + ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW); + ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW); + + Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; + Ld1Macro ld1sh = &MacroAssembler::Ld1sh; + ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW); + ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW); Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw; Ld1Macro ld1sw = &MacroAssembler::Ld1sw; - Ldff1Helper(config, data, kSRegSize, kDRegSize, ldff1sw, ld1sw, LSL); + ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW); + ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW); +} + +static void sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset( + Test* config, uintptr_t data) { + auto ldff1_32_unpacked_unscaled_offset_helper = + std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + kDRegSize, + CPURegister::kZRegister, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4, + false); - munmap(reinterpret_cast<void*>(data), page_size * 2); + Ld1Macro ldff1b = &MacroAssembler::Ldff1b; + Ld1Macro ld1b = &MacroAssembler::Ld1b; + ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW); + + Ld1Macro ldff1h = &MacroAssembler::Ldff1h; + Ld1Macro ld1h = &MacroAssembler::Ld1h; + ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW); + + Ld1Macro ldff1w = &MacroAssembler::Ldff1w; + Ld1Macro ld1w = &MacroAssembler::Ld1w; + ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW); + + Ld1Macro ldff1d = &MacroAssembler::Ldff1d; + Ld1Macro ld1d = &MacroAssembler::Ld1d; + ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW); + + Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb; + Ld1Macro ld1sb = &MacroAssembler::Ld1sb; + ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW); + + Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; + Ld1Macro ld1sh = &MacroAssembler::Ld1sh; + ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW); + + Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw; + Ld1Macro ld1sw = &MacroAssembler::Ld1sw; + ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW); + ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW); +} + +static void sve_ldff1_scalar_plus_vector_64_scaled_offset(Test* config, + uintptr_t data) { + auto ldff1_64_scaled_offset_helper = std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + kDRegSize, + CPURegister::kZRegister, + std::placeholders::_2, + std::placeholders::_3, + SVE_LSL, + true); + + Ld1Macro ldff1h = &MacroAssembler::Ldff1h; + Ld1Macro ld1h = &MacroAssembler::Ld1h; + ldff1_64_scaled_offset_helper(kHRegSize, ldff1h, ld1h); + + Ld1Macro ldff1w = &MacroAssembler::Ldff1w; + Ld1Macro ld1w = &MacroAssembler::Ld1w; + ldff1_64_scaled_offset_helper(kSRegSize, ldff1w, ld1w); + + Ld1Macro ldff1d = &MacroAssembler::Ldff1d; + Ld1Macro ld1d = &MacroAssembler::Ld1d; + ldff1_64_scaled_offset_helper(kDRegSize, ldff1d, ld1d); + + Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; + Ld1Macro ld1sh = &MacroAssembler::Ld1sh; + ldff1_64_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh); + + Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw; + Ld1Macro ld1sw = &MacroAssembler::Ld1sw; + ldff1_64_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw); +} + +static void sve_ldff1_scalar_plus_vector_64_unscaled_offset(Test* config, + uintptr_t data) { + auto ldff1_64_unscaled_offset_helper = std::bind(&Ldff1Helper, + config, + data, + std::placeholders::_1, + kDRegSize, + CPURegister::kZRegister, + std::placeholders::_2, + std::placeholders::_3, + NO_SVE_OFFSET_MODIFIER, + false); + + Ld1Macro ldff1b = &MacroAssembler::Ldff1b; + Ld1Macro ld1b = &MacroAssembler::Ld1b; + ldff1_64_unscaled_offset_helper(kBRegSize, ldff1b, ld1b); + + Ld1Macro ldff1h = &MacroAssembler::Ldff1h; + Ld1Macro ld1h = &MacroAssembler::Ld1h; + ldff1_64_unscaled_offset_helper(kHRegSize, ldff1h, ld1h); + + Ld1Macro ldff1w = &MacroAssembler::Ldff1w; + Ld1Macro ld1w = &MacroAssembler::Ld1w; + ldff1_64_unscaled_offset_helper(kSRegSize, ldff1w, ld1w); + + Ld1Macro ldff1d = &MacroAssembler::Ldff1d; + Ld1Macro ld1d = &MacroAssembler::Ld1d; + ldff1_64_unscaled_offset_helper(kDRegSize, ldff1d, ld1d); + + Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb; + Ld1Macro ld1sb = &MacroAssembler::Ld1sb; + ldff1_64_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb); + + Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; + Ld1Macro ld1sh = &MacroAssembler::Ld1sh; + ldff1_64_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh); + + Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw; + Ld1Macro ld1sw = &MacroAssembler::Ld1sw; + ldff1_64_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw); } TEST_SVE(sve_ldff1_scalar_plus_vector) { @@ -9080,38 +9356,12 @@ TEST_SVE(sve_ldff1_scalar_plus_vector) { memcpy(reinterpret_cast<void*>(data + i), &byte, 1); } - Ld1Macro ldff1b = &MacroAssembler::Ldff1b; - Ld1Macro ld1b = &MacroAssembler::Ld1b; - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1b, ld1b, UXTW); - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1b, ld1b, SXTW); - - Ld1Macro ldff1h = &MacroAssembler::Ldff1h; - Ld1Macro ld1h = &MacroAssembler::Ld1h; - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, UXTW); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, SXTW); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, UXTW, true); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, SXTW, true); - - Ld1Macro ldff1w = &MacroAssembler::Ldff1w; - Ld1Macro ld1w = &MacroAssembler::Ld1w; - Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, UXTW); - Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, SXTW); - Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, UXTW, true); - Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, SXTW, true); - - Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb; - Ld1Macro ld1sb = &MacroAssembler::Ld1sb; - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, UXTW); - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, SXTW); - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, UXTW, true); - Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, SXTW, true); - - Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh; - Ld1Macro ld1sh = &MacroAssembler::Ld1sh; - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, UXTW); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, SXTW); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, UXTW, true); - Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, UXTW, true); + sve_ldff1_scalar_plus_vector_32_scaled_offset(config, data); + sve_ldff1_scalar_plus_vector_32_unscaled_offset(config, data); + sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(config, data); + sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(config, data); + sve_ldff1_scalar_plus_vector_64_scaled_offset(config, data); + sve_ldff1_scalar_plus_vector_64_unscaled_offset(config, data); munmap(reinterpret_cast<void*>(data), page_size * 2); } diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc index 115a6c34..037cb67d 100644 --- a/test/aarch64/test-disasm-sve-aarch64.cc +++ b/test/aarch64/test-disasm-sve-aarch64.cc @@ -3589,30 +3589,6 @@ TEST(sve_mem_64bit_gather) { COMPARE_PREFIX(ld1w(z19.VnD(), p1.Zeroing(), x27, z4.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); COMPARE_PREFIX(ld1w(z21.VnD(), p1.Zeroing(), x7, z8.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]"); COMPARE_PREFIX(ld1w(z13.VnD(), p3.Zeroing(), x8, z10.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1b(z20.VnD(), p3.Zeroing(), x26, z11.VnD()), "ldff1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1b(z8.VnD(), p6.Zeroing(), x18, z29.VnD()), "ldff1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1d(z7.VnD(), p7.Zeroing(), x24, z10.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]"); - COMPARE_PREFIX(ldff1d(z11.VnD(), p5.Zeroing(), x10, z21.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1d(z25.VnD(), p0.Zeroing(), x21, z15.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3]"); - COMPARE_PREFIX(ldff1d(z15.VnD(), p7.Zeroing(), x26, z9.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1h(z13.VnD(), p7.Zeroing(), x17, z17.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]"); - COMPARE_PREFIX(ldff1h(z26.VnD(), p7.Zeroing(), x26, z31.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1h(z19.VnD(), p6.Zeroing(), x16, z12.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]"); - COMPARE_PREFIX(ldff1h(z16.VnD(), p1.Zeroing(), x20, z10.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1sb(z3.VnD(), p5.Zeroing(), x18, z3.VnD()), "ldff1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1sb(z31.VnD(), p3.Zeroing(), x13, z0.VnD()), "ldff1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1sh(z2.VnD(), p1.Zeroing(), x8, z8.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]"); - COMPARE_PREFIX(ldff1sh(z4.VnD(), p4.Zeroing(), x17, z18.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1sh(z3.VnD(), p0.Zeroing(), x13, z22.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]"); - COMPARE_PREFIX(ldff1sh(z7.VnD(), p3.Zeroing(), x16, z20.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1sw(z29.VnD(), p0.Zeroing(), x5, z1.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]"); - COMPARE_PREFIX(ldff1sw(z26.VnD(), p3.Zeroing(), x0, z31.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1sw(z25.VnD(), p5.Zeroing(), x4, z17.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]"); - COMPARE_PREFIX(ldff1sw(z7.VnD(), p6.Zeroing(), x12, z16.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); - COMPARE_PREFIX(ldff1w(z28.VnD(), p5.Zeroing(), x25, z28.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]"); - COMPARE_PREFIX(ldff1w(z20.VnD(), p4.Zeroing(), x30, z7.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]"); - COMPARE_PREFIX(ldff1w(z10.VnD(), p2.Zeroing(), x23, z25.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]"); - COMPARE_PREFIX(ldff1w(z4.VnD(), p1.Zeroing(), x8, z1.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]"); #endif CLEANUP(); @@ -3719,6 +3695,106 @@ TEST(sve_mem_prefetch) { CLEANUP(); } +TEST(sve_mem_64bit_gather_scalar_plus_vector) { + SETUP(); + + // 64-bit unscaled offset. + COMPARE_PREFIX(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())), + "ldff1b { z18.d }, p6/z, [x27, z24.d]"); + COMPARE_PREFIX(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())), + "ldff1h { z28.d }, p6/z, [x1, z30.d]"); + COMPARE_PREFIX(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())), + "ldff1w { z12.d }, p3/z, [x25, z27.d]"); + COMPARE_PREFIX(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())), + "ldff1d { z23.d }, p5/z, [x29, z31.d]"); + COMPARE_PREFIX(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())), + "ldff1sb { z15.d }, p5/z, [x5, z14.d]"); + COMPARE_PREFIX(ldff1sh(z18.VnD(), + p4.Zeroing(), + SVEMemOperand(x25, z25.VnD())), + "ldff1sh { z18.d }, p4/z, [x25, z25.d]"); + COMPARE_PREFIX(ldff1sw(z12.VnD(), + p3.Zeroing(), + SVEMemOperand(x25, z27.VnD())), + "ldff1sw { z12.d }, p3/z, [x25, z27.d]"); + + // 64-bit scaled offset. + COMPARE_PREFIX(ldff1h(z25.VnD(), + p3.Zeroing(), + SVEMemOperand(x17, z15.VnD(), LSL, 1)), + "ldff1h { z25.d }, p3/z, [x17, z15.d, lsl #1]"); + COMPARE_PREFIX(ldff1w(z5.VnD(), + p4.Zeroing(), + SVEMemOperand(x23, z31.VnD(), LSL, 2)), + "ldff1w { z5.d }, p4/z, [x23, z31.d, lsl #2]"); + COMPARE_PREFIX(ldff1d(z2.VnD(), + p0.Zeroing(), + SVEMemOperand(sp, z7.VnD(), LSL, 3)), + "ldff1d { z2.d }, p0/z, [sp, z7.d, lsl #3]"); + COMPARE_PREFIX(ldff1sh(z10.VnD(), + p0.Zeroing(), + SVEMemOperand(x19, z15.VnD(), LSL, 1)), + "ldff1sh { z10.d }, p0/z, [x19, z15.d, lsl #1]"); + COMPARE_PREFIX(ldff1sw(z5.VnD(), + p4.Zeroing(), + SVEMemOperand(x23, z31.VnD(), LSL, 2)), + "ldff1sw { z5.d }, p4/z, [x23, z31.d, lsl #2]"); + + // 32-bit unpacked unscaled offset + COMPARE_PREFIX(ldff1b(z18.VnD(), + p6.Zeroing(), + SVEMemOperand(sp, z24.VnD(), UXTW)), + "ldff1b { z18.d }, p6/z, [sp, z24.d, uxtw]"); + COMPARE_PREFIX(ldff1h(z20.VnD(), + p5.Zeroing(), + SVEMemOperand(x7, z14.VnD(), SXTW)), + "ldff1h { z20.d }, p5/z, [x7, z14.d, sxtw]"); + COMPARE_PREFIX(ldff1w(z22.VnD(), + p4.Zeroing(), + SVEMemOperand(x17, z4.VnD(), UXTW)), + "ldff1w { z22.d }, p4/z, [x17, z4.d, uxtw]"); + COMPARE_PREFIX(ldff1d(z24.VnD(), + p3.Zeroing(), + SVEMemOperand(x3, z24.VnD(), SXTW)), + "ldff1d { z24.d }, p3/z, [x3, z24.d, sxtw]"); + COMPARE_PREFIX(ldff1sb(z26.VnD(), + p2.Zeroing(), + SVEMemOperand(x13, z14.VnD(), UXTW)), + "ldff1sb { z26.d }, p2/z, [x13, z14.d, uxtw]"); + COMPARE_PREFIX(ldff1sh(z28.VnD(), + p1.Zeroing(), + SVEMemOperand(x23, z4.VnD(), SXTW)), + "ldff1sh { z28.d }, p1/z, [x23, z4.d, sxtw]"); + COMPARE_PREFIX(ldff1sw(z30.VnD(), + p0.Zeroing(), + SVEMemOperand(x8, z24.VnD(), UXTW)), + "ldff1sw { z30.d }, p0/z, [x8, z24.d, uxtw]"); + + // 32-bit unpacked scaled offset + COMPARE_PREFIX(ldff1h(z4.VnD(), + p5.Zeroing(), + SVEMemOperand(x7, z1.VnD(), SXTW, 1)), + "ldff1h { z4.d }, p5/z, [x7, z1.d, sxtw #1]"); + COMPARE_PREFIX(ldff1w(z5.VnD(), + p4.Zeroing(), + SVEMemOperand(x17, z11.VnD(), UXTW, 2)), + "ldff1w { z5.d }, p4/z, [x17, z11.d, uxtw #2]"); + COMPARE_PREFIX(ldff1d(z6.VnD(), + p3.Zeroing(), + SVEMemOperand(x3, z31.VnD(), SXTW, 3)), + "ldff1d { z6.d }, p3/z, [x3, z31.d, sxtw #3]"); + COMPARE_PREFIX(ldff1sh(z7.VnD(), + p1.Zeroing(), + SVEMemOperand(x23, z7.VnD(), UXTW, 1)), + "ldff1sh { z7.d }, p1/z, [x23, z7.d, uxtw #1]"); + COMPARE_PREFIX(ldff1sw(z8.VnD(), + p0.Zeroing(), + SVEMemOperand(x8, z17.VnD(), SXTW, 2)), + "ldff1sw { z8.d }, p0/z, [x8, z17.d, sxtw #2]"); + + CLEANUP(); +} + TEST(sve_ld2_scalar_plus_immediate) { SETUP(); |