aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorMartyn Capewell <martyn.capewell@arm.com>2020-06-05 18:20:11 +0100
committerTatWai Chong <tatwai.chong@arm.com>2020-06-22 12:51:34 -0700
commita5112344aa6a2c562379ec67398a6719360965bf (patch)
tree4ea88d7d4a0701721cb5702f510d04af85fe8f66 /test
parentcd3f6c5ec96ff6d8240a07e7084ae5de700dc9c7 (diff)
downloadvixl-a5112344aa6a2c562379ec67398a6719360965bf.tar.gz
[sve] Complete remaining gather loads.
Implement remaining 64-bit gather loads including unpacking, unscaled and scaled offset form. Change-Id: I208de1fabfe40f7095f9848c3ebf9de82a5f7416
Diffstat (limited to 'test')
-rw-r--r--test/aarch64/test-assembler-sve-aarch64.cc658
-rw-r--r--test/aarch64/test-disasm-sve-aarch64.cc115
2 files changed, 566 insertions, 207 deletions
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc
index 457f191b..991fcd4f 100644
--- a/test/aarch64/test-assembler-sve-aarch64.cc
+++ b/test/aarch64/test-assembler-sve-aarch64.cc
@@ -8842,6 +8842,7 @@ typedef void (MacroAssembler::*Ld1Macro)(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr);
+template <typename T>
static void Ldff1Helper(Test* config,
uintptr_t data,
unsigned msize_in_bits,
@@ -8849,7 +8850,7 @@ static void Ldff1Helper(Test* config,
CPURegister::RegisterType base_type,
Ld1Macro ldff1,
Ld1Macro ld1,
- SVEOffsetModifier mod,
+ T mod,
bool scale = false) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
START();
@@ -8901,30 +8902,33 @@ static void Ldff1Helper(Test* config,
if (base_type == CPURegister::kRegister) {
// Scalar-plus-scalar mode.
- if ((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER)) {
- (masm.*ldff1)(z0.WithLaneSize(esize_in_bits),
- all.Zeroing(),
- SVEMemOperand(x20, x21, mod, msize_in_bytes_log2));
- } else {
- VIXL_UNIMPLEMENTED();
- }
-
- } else if (base_type == CPURegister::kZRegister) {
+ VIXL_ASSERT((std::is_same<T, vixl::aarch64::Shift>::value));
+ VIXL_ASSERT((static_cast<int>(mod) == LSL) ||
+ (static_cast<int>(mod) == NO_SHIFT));
+ (masm.*ldff1)(z0.WithLaneSize(esize_in_bits),
+ all.Zeroing(),
+ SVEMemOperand(x20, x21, mod, msize_in_bytes_log2));
+ } else {
+ VIXL_ASSERT(base_type == CPURegister::kZRegister);
int offs_size;
bool offs_is_unsigned;
- if ((mod == SVE_SXTW) || (mod == SVE_UXTW)) {
+ if (std::is_same<T, vixl::aarch64::Extend>::value) {
// Scalar-plus-vector mode with 32-bit optional unpacked or upacked, and
// unscaled or scaled offset.
+ VIXL_ASSERT((static_cast<int>(mod) == SXTW) ||
+ (static_cast<int>(mod) == UXTW));
if (scale == true) {
// Gather first-fault bytes load doesn't support scaled offset.
VIXL_ASSERT(msize_in_bits != kBRegSize);
}
- offs_is_unsigned = (mod == SVE_UXTW) ? true : false;
+ offs_is_unsigned = (static_cast<int>(mod) == UXTW) ? true : false;
offs_size = kSRegSize;
} else {
// Scalar-plus-vector mode with 64-bit unscaled or scaled offset.
- VIXL_ASSERT((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER));
+ VIXL_ASSERT((std::is_same<T, vixl::aarch64::Shift>::value));
+ VIXL_ASSERT((static_cast<int>(mod) == LSL) ||
+ (static_cast<int>(mod) == NO_SHIFT));
offs_is_unsigned = false;
offs_size = kDRegSize;
}
@@ -8970,8 +8974,6 @@ static void Ldff1Helper(Test* config,
ldff1)(z0.WithLaneSize(esize_in_bits),
all.Zeroing(),
SVEMemOperand(x19, z17.WithLaneSize(esize_in_bits), mod, shift));
- } else {
- VIXL_UNIMPLEMENTED();
}
__ Rdffrs(p0.VnB(), all.Zeroing());
@@ -9046,7 +9048,7 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) {
memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
}
- auto ldff1_unscaled_offset_helper = std::bind(&Ldff1Helper,
+ auto ldff1_unscaled_offset_helper = std::bind(&Ldff1Helper<Shift>,
config,
data,
std::placeholders::_1,
@@ -9054,7 +9056,7 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) {
CPURegister::kRegister,
std::placeholders::_3,
std::placeholders::_4,
- NO_SVE_OFFSET_MODIFIER,
+ NO_SHIFT,
false);
Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
@@ -9070,7 +9072,7 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) {
ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1sb, ld1sb);
ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1sb, ld1sb);
- auto ldff1_scaled_offset_helper = std::bind(&Ldff1Helper,
+ auto ldff1_scaled_offset_helper = std::bind(&Ldff1Helper<Shift>,
config,
data,
std::placeholders::_1,
@@ -9078,7 +9080,7 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) {
CPURegister::kRegister,
std::placeholders::_3,
std::placeholders::_4,
- SVE_LSL,
+ LSL,
true);
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
@@ -9110,7 +9112,7 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) {
static void sve_ldff1_scalar_plus_vector_32_scaled_offset(Test* config,
uintptr_t data) {
- auto ldff1_32_scaled_offset_helper = std::bind(&Ldff1Helper,
+ auto ldff1_32_scaled_offset_helper = std::bind(&Ldff1Helper<Extend>,
config,
data,
std::placeholders::_1,
@@ -9122,23 +9124,23 @@ static void sve_ldff1_scalar_plus_vector_32_scaled_offset(Test* config,
true);
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
Ld1Macro ld1h = &MacroAssembler::Ld1h;
- ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
- ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, UXTW);
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SXTW);
Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
Ld1Macro ld1w = &MacroAssembler::Ld1w;
- ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
- ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+ ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, UXTW);
+ ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SXTW);
Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
- ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
- ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, UXTW);
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SXTW);
}
static void sve_ldff1_scalar_plus_vector_32_unscaled_offset(Test* config,
uintptr_t data) {
- auto ldff1_32_unscaled_offset_helper = std::bind(&Ldff1Helper,
+ auto ldff1_32_unscaled_offset_helper = std::bind(&Ldff1Helper<Extend>,
config,
data,
std::placeholders::_1,
@@ -9151,34 +9153,34 @@ static void sve_ldff1_scalar_plus_vector_32_unscaled_offset(Test* config,
Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
Ld1Macro ld1b = &MacroAssembler::Ld1b;
- ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW);
- ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, UXTW);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SXTW);
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
Ld1Macro ld1h = &MacroAssembler::Ld1h;
- ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
- ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, UXTW);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SXTW);
Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
Ld1Macro ld1w = &MacroAssembler::Ld1w;
- ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
- ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+ ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, UXTW);
+ ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SXTW);
Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
- ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW);
- ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, UXTW);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SXTW);
Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
- ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
- ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, UXTW);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SXTW);
}
static void sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(
Test* config, uintptr_t data) {
auto ldff1_32_unpacked_scaled_offset_helper =
- std::bind(&Ldff1Helper,
+ std::bind(&Ldff1Helper<Extend>,
config,
data,
std::placeholders::_1,
@@ -9191,34 +9193,34 @@ static void sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
Ld1Macro ld1h = &MacroAssembler::Ld1h;
- ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
- ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SXTW);
Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
Ld1Macro ld1w = &MacroAssembler::Ld1w;
- ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
- ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SXTW);
Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
Ld1Macro ld1d = &MacroAssembler::Ld1d;
- ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW);
- ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SXTW);
Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
- ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
- ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SXTW);
Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
- ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW);
- ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SXTW);
}
static void sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(
Test* config, uintptr_t data) {
auto ldff1_32_unpacked_unscaled_offset_helper =
- std::bind(&Ldff1Helper,
+ std::bind(&Ldff1Helper<Extend>,
config,
data,
std::placeholders::_1,
@@ -9231,43 +9233,43 @@ static void sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(
Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
Ld1Macro ld1b = &MacroAssembler::Ld1b;
- ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SXTW);
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
Ld1Macro ld1h = &MacroAssembler::Ld1h;
- ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SXTW);
Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
Ld1Macro ld1w = &MacroAssembler::Ld1w;
- ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SXTW);
Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
Ld1Macro ld1d = &MacroAssembler::Ld1d;
- ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SXTW);
Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
- ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SXTW);
Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
- ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SXTW);
Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
- ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW);
- ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SXTW);
}
static void sve_ldff1_scalar_plus_vector_64_scaled_offset(Test* config,
uintptr_t data) {
- auto ldff1_64_scaled_offset_helper = std::bind(&Ldff1Helper,
+ auto ldff1_64_scaled_offset_helper = std::bind(&Ldff1Helper<Shift>,
config,
data,
std::placeholders::_1,
@@ -9275,7 +9277,7 @@ static void sve_ldff1_scalar_plus_vector_64_scaled_offset(Test* config,
CPURegister::kZRegister,
std::placeholders::_2,
std::placeholders::_3,
- SVE_LSL,
+ LSL,
true);
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
@@ -9301,7 +9303,7 @@ static void sve_ldff1_scalar_plus_vector_64_scaled_offset(Test* config,
static void sve_ldff1_scalar_plus_vector_64_unscaled_offset(Test* config,
uintptr_t data) {
- auto ldff1_64_unscaled_offset_helper = std::bind(&Ldff1Helper,
+ auto ldff1_64_unscaled_offset_helper = std::bind(&Ldff1Helper<Shift>,
config,
data,
std::placeholders::_1,
@@ -9309,7 +9311,7 @@ static void sve_ldff1_scalar_plus_vector_64_unscaled_offset(Test* config,
CPURegister::kZRegister,
std::placeholders::_2,
std::placeholders::_3,
- NO_SVE_OFFSET_MODIFIER,
+ NO_SHIFT,
false);
Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
@@ -9674,13 +9676,195 @@ TEST_SVE(sve_ldff1_regression_test) {
}
}
+// Emphasis on test if the modifiers are propagated and simulated correctly.
+TEST_SVE(sve_ld1_regression_test) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
+ START();
+
+ size_t page_size = sysconf(_SC_PAGE_SIZE);
+ VIXL_ASSERT(page_size > static_cast<size_t>(config->sve_vl_in_bytes()));
+
+ uintptr_t data = reinterpret_cast<uintptr_t>(mmap(NULL,
+ page_size * 2,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1,
+ 0));
+ uintptr_t middle = data + page_size;
+ // Fill the accessible page with arbitrary data.
+ for (size_t i = 0; i < page_size; i++) {
+ // Reverse bits so we get a mixture of positive and negative values.
+ uint8_t byte = ReverseBits(static_cast<uint8_t>(i));
+ memcpy(reinterpret_cast<void*>(middle + i), &byte, 1);
+ // Make one bit roughly different in every byte and copy the bytes in the
+ // reverse direction that convenient to verifying the loads in negative
+ // indexes.
+ byte += 1;
+ memcpy(reinterpret_cast<void*>(middle - i), &byte, 1);
+ }
+
+ PRegister all = p6;
+ __ Ptrue(all.VnB());
+
+ __ Mov(x0, middle);
+ __ Index(z31.VnS(), 0, 3);
+ __ Neg(z30.VnS(), z31.VnS());
+
+ // Scalar plus vector 32 unscaled offset
+ __ Ld1b(z1.VnS(), all.Zeroing(), SVEMemOperand(x0, z31.VnS(), UXTW));
+ __ Ld1h(z2.VnS(), all.Zeroing(), SVEMemOperand(x0, z30.VnS(), SXTW));
+ __ Ld1w(z3.VnS(), all.Zeroing(), SVEMemOperand(x0, z31.VnS(), UXTW));
+ __ Ld1sb(z4.VnS(), all.Zeroing(), SVEMemOperand(x0, z30.VnS(), SXTW));
+ __ Ld1sh(z5.VnS(), all.Zeroing(), SVEMemOperand(x0, z31.VnS(), UXTW));
+
+ // Scalar plus vector 32 scaled offset
+ __ Ld1h(z6.VnS(), all.Zeroing(), SVEMemOperand(x0, z31.VnS(), UXTW, 1));
+ __ Ld1w(z7.VnS(), all.Zeroing(), SVEMemOperand(x0, z31.VnS(), UXTW, 2));
+ __ Ld1sh(z8.VnS(), all.Zeroing(), SVEMemOperand(x0, z30.VnS(), SXTW, 1));
+
+ __ Index(z31.VnD(), 0, 3);
+ __ Neg(z30.VnD(), z31.VnD());
+
+ // Ensure only the low 32 bits are used for the testing with positive index
+ // values. It also test if the indexes are treated as positive in `uxtw` form.
+ __ Mov(x3, 0x8000000080000000);
+ __ Dup(z28.VnD(), x3);
+ __ Sub(x2, x0, 0x80000000);
+ __ Add(z29.VnD(), z31.VnD(), z28.VnD());
+
+ // Scalar plus vector 32 unpacked unscaled offset
+ __ Ld1b(z9.VnD(), all.Zeroing(), SVEMemOperand(x2, z29.VnD(), UXTW));
+ __ Ld1h(z10.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), SXTW));
+ __ Ld1w(z11.VnD(), all.Zeroing(), SVEMemOperand(x2, z29.VnD(), UXTW));
+ __ Ld1sb(z12.VnD(), all.Zeroing(), SVEMemOperand(x2, z29.VnD(), UXTW));
+ __ Ld1sh(z13.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), SXTW));
+ __ Ld1sw(z14.VnD(), all.Zeroing(), SVEMemOperand(x2, z29.VnD(), UXTW));
+
+ // Scalar plus vector 32 unpacked scaled offset
+ __ Ld1h(z15.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), SXTW, 1));
+ __ Ld1w(z16.VnD(), all.Zeroing(), SVEMemOperand(x0, z31.VnD(), UXTW, 2));
+ __ Ld1d(z17.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), SXTW, 3));
+ __ Ld1sh(z18.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), SXTW, 1));
+ __ Ld1sw(z19.VnD(), all.Zeroing(), SVEMemOperand(x0, z31.VnD(), UXTW, 2));
+
+ __ Sub(x0, x0, x3);
+ // Note that the positive indexes has been added by `0x8000000080000000`. The
+ // wrong address will be accessed if the address is treated as negative.
+
+ // Scalar plus vector 64 unscaled offset
+ __ Ld1b(z20.VnD(), all.Zeroing(), SVEMemOperand(x0, z29.VnD()));
+ __ Ld1h(z21.VnD(), all.Zeroing(), SVEMemOperand(x0, z29.VnD()));
+ __ Ld1w(z22.VnD(), all.Zeroing(), SVEMemOperand(x0, z29.VnD()));
+ __ Ld1sh(z23.VnD(), all.Zeroing(), SVEMemOperand(x0, z29.VnD()));
+ __ Ld1sw(z24.VnD(), all.Zeroing(), SVEMemOperand(x0, z29.VnD()));
+
+ // Scalar plus vector 64 scaled offset
+ __ Lsr(z29.VnD(), z28.VnD(), 1); // Shift right to 0x4000000040000000
+ __ Add(z30.VnD(), z31.VnD(), z29.VnD());
+ __ Ld1h(z25.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), LSL, 1));
+ __ Ld1sh(z26.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), LSL, 1));
+
+ __ Lsr(z29.VnD(), z29.VnD(), 1); // Shift right to 0x2000000020000000
+ __ Add(z30.VnD(), z31.VnD(), z29.VnD());
+ __ Ld1w(z27.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), LSL, 2));
+ __ Ld1sw(z28.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), LSL, 2));
+
+ __ Lsr(z29.VnD(), z29.VnD(), 1); // Shift right to 0x1000000010000000
+ __ Add(z30.VnD(), z31.VnD(), z29.VnD());
+ __ Ld1d(z29.VnD(), all.Zeroing(), SVEMemOperand(x0, z30.VnD(), LSL, 3));
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ // Scalar plus vector 32 unscaled offset
+ uint32_t expected_z1[] = {0x00000090, 0x00000060, 0x000000c0, 0x00000001};
+ uint32_t expected_z2[] = {0x00001191, 0x0000a161, 0x000041c1, 0x00008001};
+ uint32_t expected_z3[] = {0x30d05090, 0x9010e060, 0x60a020c0, 0xc0408001};
+ uint32_t expected_z4[] = {0xffffff91, 0x00000061, 0xffffffc1, 0x00000001};
+ uint32_t expected_z5[] = {0x00005090, 0xffffe060, 0x000020c0, 0xffff8001};
+
+ ASSERT_EQUAL_SVE(expected_z1, z1.VnS());
+ ASSERT_EQUAL_SVE(expected_z2, z2.VnS());
+ ASSERT_EQUAL_SVE(expected_z3, z3.VnS());
+ ASSERT_EQUAL_SVE(expected_z4, z4.VnS());
+ ASSERT_EQUAL_SVE(expected_z5, z5.VnS());
+
+ // Scalar plus vector 32 scaled offset
+ uint32_t expected_z6[] = {0x0000c848, 0x0000b030, 0x0000e060, 0x00008001};
+ uint32_t expected_z7[] = {0xe464a424, 0xd8589818, 0xf070b030, 0xc0408001};
+ uint32_t expected_z8[] = {0xffff8949, 0xffffd131, 0xffffa161, 0xffff8001};
+
+ ASSERT_EQUAL_SVE(expected_z6, z6.VnS());
+ ASSERT_EQUAL_SVE(expected_z7, z7.VnS());
+ ASSERT_EQUAL_SVE(expected_z8, z8.VnS());
+
+ // Scalar plus vector 32 unpacked unscaled offset
+ uint64_t expected_z9[] = {0x00000000000000c0, 0x0000000000000001};
+ uint64_t expected_z10[] = {0x00000000000041c1, 0x0000000000008001};
+ uint64_t expected_z11[] = {0x0000000060a020c0, 0x00000000c0408001};
+ uint64_t expected_z12[] = {0xffffffffffffffc0, 0x0000000000000001};
+ uint64_t expected_z13[] = {0x00000000000041c1, 0xffffffffffff8001};
+ uint64_t expected_z14[] = {0x0000000060a020c0, 0xffffffffc0408001};
+
+ ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
+ ASSERT_EQUAL_SVE(expected_z10, z10.VnD());
+ ASSERT_EQUAL_SVE(expected_z11, z11.VnD());
+ ASSERT_EQUAL_SVE(expected_z12, z12.VnD());
+ ASSERT_EQUAL_SVE(expected_z13, z13.VnD());
+ ASSERT_EQUAL_SVE(expected_z14, z14.VnD());
+
+ // Scalar plus vector 32 unpacked scaled offset
+ uint64_t expected_z15[] = {0x000000000000a161, 0x0000000000008001};
+ uint64_t expected_z16[] = {0x00000000f070b030, 0x00000000c0408001};
+ uint64_t expected_z17[] = {0x8949c929a969e919, 0xe060a020c0408001};
+ uint64_t expected_z18[] = {0xffffffffffffa161, 0xffffffffffff8001};
+ uint64_t expected_z19[] = {0xfffffffff070b030, 0xffffffffc0408001};
+
+ ASSERT_EQUAL_SVE(expected_z15, z15.VnD());
+ ASSERT_EQUAL_SVE(expected_z16, z16.VnD());
+ ASSERT_EQUAL_SVE(expected_z17, z17.VnD());
+ ASSERT_EQUAL_SVE(expected_z18, z18.VnD());
+ ASSERT_EQUAL_SVE(expected_z19, z19.VnD());
+
+ // Scalar plus vector 64 unscaled offset
+ uint64_t expected_z20[] = {0x00000000000000c0, 0x0000000000000001};
+ uint64_t expected_z21[] = {0x00000000000020c0, 0x0000000000008001};
+ uint64_t expected_z22[] = {0x0000000060a020c0, 0x00000000c0408001};
+ uint64_t expected_z23[] = {0x00000000000020c0, 0xffffffffffff8001};
+ uint64_t expected_z24[] = {0x0000000060a020c0, 0xffffffffc0408001};
+
+ ASSERT_EQUAL_SVE(expected_z20, z20.VnD());
+ ASSERT_EQUAL_SVE(expected_z21, z21.VnD());
+ ASSERT_EQUAL_SVE(expected_z22, z22.VnD());
+ ASSERT_EQUAL_SVE(expected_z23, z23.VnD());
+ ASSERT_EQUAL_SVE(expected_z24, z24.VnD());
+
+ uint64_t expected_z25[] = {0x000000000000e060, 0x0000000000008001};
+ uint64_t expected_z26[] = {0xffffffffffffe060, 0xffffffffffff8001};
+ uint64_t expected_z27[] = {0x00000000f070b030, 0x00000000c0408001};
+ uint64_t expected_z28[] = {0xfffffffff070b030, 0xffffffffc0408001};
+ uint64_t expected_z29[] = {0xf878b838d8589818, 0xe060a020c0408001};
+
+ // Scalar plus vector 64 scaled offset
+ ASSERT_EQUAL_SVE(expected_z25, z25.VnD());
+ ASSERT_EQUAL_SVE(expected_z26, z26.VnD());
+ ASSERT_EQUAL_SVE(expected_z27, z27.VnD());
+ ASSERT_EQUAL_SVE(expected_z28, z28.VnD());
+ ASSERT_EQUAL_SVE(expected_z29, z29.VnD());
+ }
+}
+
// Test gather loads by comparing them with the result of a set of equivalent
// scalar loads.
+template <typename T>
static void GatherLoadScalarPlusVectorHelper(Test* config,
unsigned msize_in_bits,
unsigned esize_in_bits,
Ld1Macro ld1,
Ld1Macro ldff1,
+ T mod,
bool is_signed,
bool is_scaled) {
// SVE supports 32- and 64-bit addressing for gather loads.
@@ -9710,10 +9894,10 @@ static void GatherLoadScalarPlusVectorHelper(Test* config,
ZRegister zn = z0.WithLaneSize(esize_in_bits);
ZRegister zt_ref = z1.WithLaneSize(esize_in_bits);
- ZRegister zt_ux = z2.WithLaneSize(esize_in_bits);
- ZRegister zt_sx = z3.WithLaneSize(esize_in_bits);
- ZRegister zt_ff_ux = z4.WithLaneSize(esize_in_bits);
- ZRegister zt_ff_sx = z5.WithLaneSize(esize_in_bits);
+ ZRegister zt = z2.WithLaneSize(esize_in_bits);
+ ZRegister zt_ff = z3.WithLaneSize(esize_in_bits);
+ PRegisterWithLaneSize pg_ff = p1.WithLaneSize(esize_in_bits);
+ PRegisterWithLaneSize pg_diff = p2.WithLaneSize(esize_in_bits);
int shift = 0;
if (is_scaled) {
@@ -9755,42 +9939,29 @@ static void GatherLoadScalarPlusVectorHelper(Test* config,
__ Lsr(zn, zn, shift);
}
- // TODO: Also test 64 bit scalar-plus-vector SVEMemOperands.
- VIXL_ASSERT(esize_in_bits == kSRegSize);
- (masm.*ld1)(zt_ux, pg, SVEMemOperand(x0, zn, UXTW, shift));
- (masm.*ld1)(zt_sx, pg, SVEMemOperand(x0, zn, SXTW, shift));
+ (masm.*ld1)(zt, pg, SVEMemOperand(x0, zn, mod, shift));
Register ffr_check_count = x17;
__ Mov(ffr_check_count, 0);
- // Compare these two vector register and place the different to
- // `ffr_check_count`.
- auto ffr_check = [&](auto zt_ref, auto zt) {
- PRegisterWithLaneSize pg_ff = p1.WithLaneSize(esize_in_bits);
- PRegisterWithLaneSize pg_diff = p2.WithLaneSize(esize_in_bits);
-
- masm.Rdffrs(pg_ff.VnB(), all.Zeroing());
- masm.Cmpeq(pg_diff, all.Zeroing(), zt_ref, zt);
- masm.Eor(pg_diff.VnB(), all.Zeroing(), pg_diff.VnB(), pg_ff.VnB());
- masm.Cntp(x12, all, pg_diff);
- masm.Add(ffr_check_count, ffr_check_count, x12);
- };
-
// Test the data correctness in which the data gather load from different
// addresses. The first-fault behavior test is emphasized in `Ldff1Helper`.
__ Setffr();
- (masm.*ldff1)(zt_ff_ux, pg, SVEMemOperand(x0, zn, UXTW, shift));
- ffr_check(zt_ref, zt_ff_ux);
- (masm.*ldff1)(zt_ff_sx, pg, SVEMemOperand(x0, zn, SXTW, shift));
- ffr_check(zt_ref, zt_ff_sx);
+ (masm.*ldff1)(zt_ff, pg, SVEMemOperand(x0, zn, mod, shift));
+
+ // Compare these two vector register and place the different to
+ // `ffr_check_count`.
+ __ Rdffrs(pg_ff.VnB(), all.Zeroing());
+ __ Cmpeq(pg_diff, all.Zeroing(), zt_ref, zt_ff);
+ __ Eor(pg_diff.VnB(), all.Zeroing(), pg_diff.VnB(), pg_ff.VnB());
+ __ Incp(ffr_check_count, pg_diff);
END();
if (CAN_RUN()) {
RUN();
- ASSERT_EQUAL_SVE(zt_ref, zt_ux);
- ASSERT_EQUAL_SVE(zt_ref, zt_sx);
+ ASSERT_EQUAL_SVE(zt_ref, zt);
ASSERT_EQUAL_64(0, ffr_check_count);
}
@@ -9987,91 +10158,220 @@ TEST_SVE(sve_ld1sh_32bit_vector_plus_immediate) {
true);
}
-TEST_SVE(sve_ld1b_32bit_scalar_plus_vector) {
- bool is_signed = false;
- bool is_scaled = false;
- GatherLoadScalarPlusVectorHelper(config,
- kBRegSize,
- kSRegSize,
- &MacroAssembler::Ld1b,
- &MacroAssembler::Ldff1b,
- is_signed,
- is_scaled);
-}
-
-TEST_SVE(sve_ld1h_32bit_scalar_plus_vector) {
- bool is_signed = false;
- bool is_scaled = false;
- GatherLoadScalarPlusVectorHelper(config,
- kHRegSize,
- kSRegSize,
- &MacroAssembler::Ld1h,
- &MacroAssembler::Ldff1h,
- is_signed,
- is_scaled);
-
- is_scaled = true;
- GatherLoadScalarPlusVectorHelper(config,
- kHRegSize,
- kSRegSize,
- &MacroAssembler::Ld1h,
- &MacroAssembler::Ldff1h,
- is_signed,
- is_scaled);
-}
-
-TEST_SVE(sve_ld1w_32bit_scalar_plus_vector) {
- bool is_signed = false;
- bool is_scaled = false;
- GatherLoadScalarPlusVectorHelper(config,
- kSRegSize,
- kSRegSize,
- &MacroAssembler::Ld1w,
- &MacroAssembler::Ldff1w,
- is_signed,
- is_scaled);
-
- is_scaled = true;
- GatherLoadScalarPlusVectorHelper(config,
- kSRegSize,
- kSRegSize,
- &MacroAssembler::Ld1w,
- &MacroAssembler::Ldff1w,
- is_signed,
- is_scaled);
-}
-
-TEST_SVE(sve_ld1sb_32bit_scalar_plus_vector) {
- bool is_signed = true;
- bool is_scaled = false;
- GatherLoadScalarPlusVectorHelper(config,
- kBRegSize,
- kSRegSize,
- &MacroAssembler::Ld1sb,
- &MacroAssembler::Ldff1sb,
- is_signed,
- is_scaled);
-}
-
-TEST_SVE(sve_ld1sh_32bit_scalar_plus_vector) {
- bool is_signed = true;
- bool is_scaled = false;
- GatherLoadScalarPlusVectorHelper(config,
- kHRegSize,
- kSRegSize,
- &MacroAssembler::Ld1sh,
- &MacroAssembler::Ldff1sh,
- is_signed,
- is_scaled);
-
- is_scaled = true;
- GatherLoadScalarPlusVectorHelper(config,
- kHRegSize,
- kSRegSize,
- &MacroAssembler::Ld1sh,
- &MacroAssembler::Ldff1sh,
- is_signed,
- is_scaled);
+TEST_SVE(sve_ld1_scalar_plus_vector_32_scaled_offset) {
+ auto ld1_32_scaled_offset_helper =
+ std::bind(&GatherLoadScalarPlusVectorHelper<Extend>,
+ config,
+ std::placeholders::_1,
+ kSRegSize,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ std::placeholders::_5,
+ true);
+
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ ld1_32_scaled_offset_helper(kHRegSize, ld1h, ldff1h, UXTW, false);
+ ld1_32_scaled_offset_helper(kHRegSize, ld1h, ldff1h, SXTW, false);
+
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ ld1_32_scaled_offset_helper(kSRegSize, ld1w, ldff1w, UXTW, false);
+ ld1_32_scaled_offset_helper(kSRegSize, ld1w, ldff1w, SXTW, false);
+
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ ld1_32_scaled_offset_helper(kHRegSize, ld1sh, ldff1sh, UXTW, true);
+ ld1_32_scaled_offset_helper(kHRegSize, ld1sh, ldff1sh, SXTW, true);
+}
+
+TEST_SVE(sve_ld1_scalar_plus_vector_32_unscaled_offset) {
+ auto ld1_32_unscaled_offset_helper =
+ std::bind(&GatherLoadScalarPlusVectorHelper<Extend>,
+ config,
+ std::placeholders::_1,
+ kSRegSize,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ std::placeholders::_5,
+ false);
+
+ Ld1Macro ld1b = &MacroAssembler::Ld1b;
+ Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
+ ld1_32_unscaled_offset_helper(kBRegSize, ld1b, ldff1b, UXTW, false);
+ ld1_32_unscaled_offset_helper(kBRegSize, ld1b, ldff1b, SXTW, false);
+
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ ld1_32_unscaled_offset_helper(kHRegSize, ld1h, ldff1h, UXTW, false);
+ ld1_32_unscaled_offset_helper(kHRegSize, ld1h, ldff1h, SXTW, false);
+
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ ld1_32_unscaled_offset_helper(kSRegSize, ld1w, ldff1w, UXTW, false);
+ ld1_32_unscaled_offset_helper(kSRegSize, ld1w, ldff1w, SXTW, false);
+
+ Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
+ Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
+ ld1_32_unscaled_offset_helper(kBRegSize, ld1sb, ldff1sb, UXTW, true);
+ ld1_32_unscaled_offset_helper(kBRegSize, ld1sb, ldff1sb, SXTW, true);
+
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ ld1_32_unscaled_offset_helper(kHRegSize, ld1sh, ldff1sh, UXTW, true);
+ ld1_32_unscaled_offset_helper(kHRegSize, ld1sh, ldff1sh, SXTW, true);
+}
+
+TEST_SVE(sve_ld1_scalar_plus_vector_32_unpacked_scaled_offset) {
+ auto ld1_32_unpacked_scaled_offset_helper =
+ std::bind(&GatherLoadScalarPlusVectorHelper<Extend>,
+ config,
+ std::placeholders::_1,
+ kDRegSize,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ std::placeholders::_5,
+ true);
+
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ ld1_32_unpacked_scaled_offset_helper(kHRegSize, ld1h, ldff1h, UXTW, false);
+ ld1_32_unpacked_scaled_offset_helper(kHRegSize, ld1h, ldff1h, SXTW, false);
+
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ ld1_32_unpacked_scaled_offset_helper(kSRegSize, ld1w, ldff1w, UXTW, false);
+ ld1_32_unpacked_scaled_offset_helper(kSRegSize, ld1w, ldff1w, SXTW, false);
+
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ ld1_32_unpacked_scaled_offset_helper(kDRegSize, ld1d, ldff1d, UXTW, false);
+ ld1_32_unpacked_scaled_offset_helper(kDRegSize, ld1d, ldff1d, SXTW, false);
+
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ ld1_32_unpacked_scaled_offset_helper(kHRegSize, ld1sh, ldff1sh, UXTW, true);
+ ld1_32_unpacked_scaled_offset_helper(kHRegSize, ld1sh, ldff1sh, SXTW, true);
+
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ ld1_32_unpacked_scaled_offset_helper(kSRegSize, ld1sw, ldff1sw, UXTW, true);
+ ld1_32_unpacked_scaled_offset_helper(kSRegSize, ld1sw, ldff1sw, SXTW, true);
+}
+
+TEST_SVE(sve_ld1_scalar_plus_vector_32_unpacked_unscaled_offset) {
+ auto ld1_32_unpacked_unscaled_offset_helper =
+ std::bind(&GatherLoadScalarPlusVectorHelper<Extend>,
+ config,
+ std::placeholders::_1,
+ kDRegSize,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ std::placeholders::_5,
+ false);
+
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ ld1_32_unpacked_unscaled_offset_helper(kHRegSize, ld1h, ldff1h, UXTW, false);
+ ld1_32_unpacked_unscaled_offset_helper(kHRegSize, ld1h, ldff1h, SXTW, false);
+
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ ld1_32_unpacked_unscaled_offset_helper(kSRegSize, ld1w, ldff1w, UXTW, false);
+ ld1_32_unpacked_unscaled_offset_helper(kSRegSize, ld1w, ldff1w, SXTW, false);
+
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ ld1_32_unpacked_unscaled_offset_helper(kDRegSize, ld1d, ldff1d, UXTW, false);
+ ld1_32_unpacked_unscaled_offset_helper(kDRegSize, ld1d, ldff1d, SXTW, false);
+
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ ld1_32_unpacked_unscaled_offset_helper(kHRegSize, ld1sh, ldff1sh, UXTW, true);
+ ld1_32_unpacked_unscaled_offset_helper(kHRegSize, ld1sh, ldff1sh, SXTW, true);
+
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ ld1_32_unpacked_unscaled_offset_helper(kSRegSize, ld1sw, ldff1sw, UXTW, true);
+ ld1_32_unpacked_unscaled_offset_helper(kSRegSize, ld1sw, ldff1sw, SXTW, true);
+}
+
+TEST_SVE(sve_ld1_scalar_plus_vector_64_scaled_offset) {
+ auto ld1_64_scaled_offset_helper =
+ std::bind(&GatherLoadScalarPlusVectorHelper<Shift>,
+ config,
+ std::placeholders::_1,
+ kDRegSize,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ LSL,
+ std::placeholders::_4,
+ true);
+
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ ld1_64_scaled_offset_helper(kHRegSize, ld1h, ldff1h, false);
+
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ ld1_64_scaled_offset_helper(kSRegSize, ld1w, ldff1w, false);
+
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ ld1_64_scaled_offset_helper(kDRegSize, ld1d, ldff1d, false);
+
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ ld1_64_scaled_offset_helper(kHRegSize, ld1sh, ldff1sh, true);
+
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ ld1_64_scaled_offset_helper(kSRegSize, ld1sw, ldff1sw, true);
+}
+
+TEST_SVE(sve_ld1_scalar_plus_vector_64_unscaled_offset) {
+ auto ld1_64_unscaled_offset_helper =
+ std::bind(&GatherLoadScalarPlusVectorHelper<Shift>,
+ config,
+ std::placeholders::_1,
+ kDRegSize,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ NO_SHIFT,
+ std::placeholders::_4,
+ false);
+
+ Ld1Macro ld1b = &MacroAssembler::Ld1b;
+ Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
+ ld1_64_unscaled_offset_helper(kBRegSize, ld1b, ldff1b, false);
+
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ ld1_64_unscaled_offset_helper(kHRegSize, ld1h, ldff1h, false);
+
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ ld1_64_unscaled_offset_helper(kSRegSize, ld1w, ldff1w, false);
+
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ ld1_64_unscaled_offset_helper(kDRegSize, ld1d, ldff1d, false);
+
+ Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
+ Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
+ ld1_64_unscaled_offset_helper(kBRegSize, ld1sb, ldff1sb, true);
+
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ ld1_64_unscaled_offset_helper(kHRegSize, ld1sh, ldff1sh, true);
+
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ ld1_64_unscaled_offset_helper(kSRegSize, ld1sw, ldff1sw, true);
}
TEST_SVE(sve_ldnt1) {
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 9beb7744..fa0a4891 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -3616,35 +3616,94 @@ TEST(sve_mem_64bit_gather_vector_plus_immediate_macro) {
CLEANUP();
}
-TEST(sve_mem_64bit_gather) {
+TEST(sve_mem_64bit_gather_scalar_plus_vector) {
SETUP();
-#if 0
- COMPARE_PREFIX(ld1b(z30.VnD(), p6.Zeroing(), x10, z24.VnD()), "ld1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1b(z19.VnD(), p5.Zeroing(), x21, z29.VnD()), "ld1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ld1d(z20.VnD(), p3.Zeroing(), x3, z15.VnD()), "ld1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]");
- COMPARE_PREFIX(ld1d(z18.VnD(), p5.Zeroing(), x11, z11.VnD()), "ld1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1d(z25.VnD(), p3.Zeroing(), x14, z0.VnD()), "ld1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3]");
- COMPARE_PREFIX(ld1d(z9.VnD(), p5.Zeroing(), x5, z21.VnD()), "ld1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ld1h(z24.VnD(), p4.Zeroing(), x6, z11.VnD()), "ld1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]");
- COMPARE_PREFIX(ld1h(z2.VnD(), p3.Zeroing(), x16, z18.VnD()), "ld1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1h(z21.VnD(), p5.Zeroing(), x13, z8.VnD()), "ld1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]");
- COMPARE_PREFIX(ld1h(z26.VnD(), p3.Zeroing(), x1, z10.VnD()), "ld1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ld1sb(z11.VnD(), p3.Zeroing(), x24, z21.VnD()), "ld1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1sb(z4.VnD(), p1.Zeroing(), x24, z15.VnD()), "ld1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ld1sh(z22.VnD(), p6.Zeroing(), x7, z31.VnD()), "ld1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]");
- COMPARE_PREFIX(ld1sh(z7.VnD(), p7.Zeroing(), x28, z23.VnD()), "ld1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1sh(z29.VnD(), p0.Zeroing(), x9, z10.VnD()), "ld1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]");
- COMPARE_PREFIX(ld1sh(z9.VnD(), p1.Zeroing(), x0, z12.VnD()), "ld1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ld1sw(z9.VnD(), p0.Zeroing(), x2, z27.VnD()), "ld1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]");
- COMPARE_PREFIX(ld1sw(z29.VnD(), p7.Zeroing(), x27, z4.VnD()), "ld1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1sw(z5.VnD(), p2.Zeroing(), x1, z23.VnD()), "ld1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]");
- COMPARE_PREFIX(ld1sw(z19.VnD(), p2.Zeroing(), x19, z16.VnD()), "ld1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ld1w(z9.VnD(), p2.Zeroing(), x0, z0.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]");
- COMPARE_PREFIX(ld1w(z19.VnD(), p1.Zeroing(), x27, z4.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ld1w(z21.VnD(), p1.Zeroing(), x7, z8.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]");
- COMPARE_PREFIX(ld1w(z13.VnD(), p3.Zeroing(), x8, z10.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
-#endif
+ COMPARE_PREFIX(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())),
+ "ld1b { z30.d }, p6/z, [sp, z24.d]");
+ COMPARE_PREFIX(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())),
+ "ld1d { z18.d }, p5/z, [x11, z11.d]");
+ COMPARE_PREFIX(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())),
+ "ld1h { z2.d }, p3/z, [x16, z18.d]");
+ COMPARE_PREFIX(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())),
+ "ld1sb { z11.d }, p3/z, [x24, z21.d]");
+ COMPARE_PREFIX(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())),
+ "ld1sh { z7.d }, p7/z, [x28, z23.d]");
+ COMPARE_PREFIX(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())),
+ "ld1sw { z29.d }, p7/z, [x27, z4.d]");
+ COMPARE_PREFIX(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())),
+ "ld1w { z19.d }, p1/z, [x27, z4.d]");
+
+ COMPARE_PREFIX(ld1d(z20.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x3, z15.VnD(), LSL, 3)),
+ "ld1d { z20.d }, p3/z, [x3, z15.d, lsl #3]");
+ COMPARE_PREFIX(ld1h(z24.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x6, z11.VnD(), LSL, 1)),
+ "ld1h { z24.d }, p4/z, [x6, z11.d, lsl #1]");
+ COMPARE_PREFIX(ld1sh(z22.VnD(),
+ p6.Zeroing(),
+ SVEMemOperand(x7, z31.VnD(), LSL, 1)),
+ "ld1sh { z22.d }, p6/z, [x7, z31.d, lsl #1]");
+ COMPARE_PREFIX(ld1sw(z9.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x2, z27.VnD(), LSL, 2)),
+ "ld1sw { z9.d }, p0/z, [x2, z27.d, lsl #2]");
+ COMPARE_PREFIX(ld1w(z9.VnD(),
+ p2.Zeroing(),
+ SVEMemOperand(x0, z0.VnD(), LSL, 2)),
+ "ld1w { z9.d }, p2/z, [x0, z0.d, lsl #2]");
+
+ COMPARE_PREFIX(ld1b(z19.VnD(),
+ p5.Zeroing(),
+ SVEMemOperand(x21, z29.VnD(), UXTW)),
+ "ld1b { z19.d }, p5/z, [x21, z29.d, uxtw]");
+ COMPARE_PREFIX(ld1d(z9.VnD(),
+ p5.Zeroing(),
+ SVEMemOperand(x5, z21.VnD(), SXTW)),
+ "ld1d { z9.d }, p5/z, [x5, z21.d, sxtw]");
+ COMPARE_PREFIX(ld1h(z26.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x1, z10.VnD(), UXTW)),
+ "ld1h { z26.d }, p3/z, [x1, z10.d, uxtw]");
+ COMPARE_PREFIX(ld1sb(z4.VnD(),
+ p1.Zeroing(),
+ SVEMemOperand(x24, z15.VnD(), SXTW)),
+ "ld1sb { z4.d }, p1/z, [x24, z15.d, sxtw]");
+ COMPARE_PREFIX(ld1sh(z9.VnD(),
+ p1.Zeroing(),
+ SVEMemOperand(x0, z12.VnD(), UXTW)),
+ "ld1sh { z9.d }, p1/z, [x0, z12.d, uxtw]");
+ COMPARE_PREFIX(ld1sw(z19.VnD(),
+ p2.Zeroing(),
+ SVEMemOperand(x19, z16.VnD(), SXTW)),
+ "ld1sw { z19.d }, p2/z, [x19, z16.d, sxtw]");
+ COMPARE_PREFIX(ld1w(z13.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x8, z10.VnD(), UXTW)),
+ "ld1w { z13.d }, p3/z, [x8, z10.d, uxtw]");
+
+ COMPARE_PREFIX(ld1d(z25.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x14, z0.VnD(), UXTW, 3)),
+ "ld1d { z25.d }, p3/z, [x14, z0.d, uxtw #3]");
+ COMPARE_PREFIX(ld1h(z21.VnD(),
+ p5.Zeroing(),
+ SVEMemOperand(x13, z8.VnD(), SXTW, 1)),
+ "ld1h { z21.d }, p5/z, [x13, z8.d, sxtw #1]");
+ COMPARE_PREFIX(ld1sh(z29.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x9, z10.VnD(), UXTW, 1)),
+ "ld1sh { z29.d }, p0/z, [x9, z10.d, uxtw #1]");
+ COMPARE_PREFIX(ld1sw(z5.VnD(),
+ p2.Zeroing(),
+ SVEMemOperand(x1, z23.VnD(), SXTW, 2)),
+ "ld1sw { z5.d }, p2/z, [x1, z23.d, sxtw #2]");
+ COMPARE_PREFIX(ld1w(z21.VnD(),
+ p1.Zeroing(),
+ SVEMemOperand(x7, z8.VnD(), UXTW, 2)),
+ "ld1w { z21.d }, p1/z, [x7, z8.d, uxtw #2]");
CLEANUP();
}
@@ -3750,7 +3809,7 @@ TEST(sve_mem_prefetch) {
CLEANUP();
}
-TEST(sve_mem_64bit_gather_scalar_plus_vector) {
+TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) {
SETUP();
// 64-bit unscaled offset.