aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorTatWai Chong <tatwai.chong@arm.com>2020-06-01 20:54:06 -0700
committerTatWai Chong <tatwai.chong@arm.com>2020-06-01 22:47:12 -0700
commit1af34f12ac11ab47651eee548ac4e570cef39c05 (patch)
tree0773d9f608fede7e4033e3327707155efabc0292 /test
parent6537a9a73eeaff4b2534da52d4151462030a234c (diff)
downloadvixl-1af34f12ac11ab47651eee548ac4e570cef39c05.tar.gz
[sve] Implement gather load first-fault data to 64-bit vector (vector index).
Include ldff1b, ldff1h, ldff1w, ldff1d, ldff1sb, ldff1sh and ldffsw. Change-Id: I256831727dc9d83b6d13eb5b8d9c15b0192cc277
Diffstat (limited to 'test')
-rw-r--r--test/aarch64/test-assembler-sve-aarch64.cc376
-rw-r--r--test/aarch64/test-disasm-sve-aarch64.cc124
2 files changed, 413 insertions, 87 deletions
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc
index 5e93752d..ede7b7a1 100644
--- a/test/aarch64/test-assembler-sve-aarch64.cc
+++ b/test/aarch64/test-assembler-sve-aarch64.cc
@@ -32,6 +32,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <functional>
#include "test-runner.h"
#include "test-utils.h"
@@ -8832,14 +8833,14 @@ typedef void (MacroAssembler::*Ld1Macro)(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr);
-template <typename T>
static void Ldff1Helper(Test* config,
uintptr_t data,
unsigned msize_in_bits,
unsigned esize_in_bits,
+ CPURegister::RegisterType base_type,
Ld1Macro ldff1,
Ld1Macro ld1,
- T mod,
+ SVEOffsetModifier mod,
bool scale = false) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
START();
@@ -8889,16 +8890,36 @@ static void Ldff1Helper(Test* config,
__ Mov(x20, data);
__ Mov(x21, offset);
- if (std::is_same<T, vixl::aarch64::Shift>::value) {
- VIXL_ASSERT(scale == false);
- // Scalar plus scalar mode.
- (masm.*ldff1)(z0.WithLaneSize(esize_in_bits),
- all.Zeroing(),
- SVEMemOperand(x20, x21, mod, msize_in_bytes_log2));
+ if (base_type == CPURegister::kRegister) {
+ // Scalar-plus-scalar mode.
+ if ((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER)) {
+ (masm.*ldff1)(z0.WithLaneSize(esize_in_bits),
+ all.Zeroing(),
+ SVEMemOperand(x20, x21, mod, msize_in_bytes_log2));
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ } else if (base_type == CPURegister::kZRegister) {
+ int offs_size;
+ bool offs_is_unsigned;
+ if ((mod == SVE_SXTW) || (mod == SVE_UXTW)) {
+ // Scalar-plus-vector mode with 32-bit optional unpacked or upacked, and
+ // unscaled or scaled offset.
+ if (scale == true) {
+ // Gather first-fault bytes load doesn't support scaled offset.
+ VIXL_ASSERT(msize_in_bits != kBRegSize);
+ }
+ offs_is_unsigned = (mod == SVE_UXTW) ? true : false;
+ offs_size = kSRegSize;
+
+ } else {
+ // Scalar-plus-vector mode with 64-bit unscaled or scaled offset.
+ VIXL_ASSERT((mod == SVE_LSL) || (mod == NO_SVE_OFFSET_MODIFIER));
+ offs_is_unsigned = false;
+ offs_size = kDRegSize;
+ }
- } else if (std::is_same<T, vixl::aarch64::Extend>::value) {
- VIXL_ASSERT((static_cast<int>(mod) == UXTW) ||
- (static_cast<int>(mod) == SXTW));
// For generating the pattern of "base address + index << shift".
// In case of unscaled-offset operation, use `msize_in_bytes` be an offset
// of each decreasing memory accesses. otherwise, decreases the indexes by 1
@@ -8906,12 +8927,10 @@ static void Ldff1Helper(Test* config,
int shift = (scale == true) ? msize_in_bytes_log2 : 0;
int index_offset = msize_in_bytes >> shift;
VIXL_ASSERT(index_offset > 0);
- // TODO `offs_size` can be different once 64-bit offset is supported.
- int offs_size = kSRegSize;
uint64_t index = 0;
uint64_t base_address = 0;
- if (static_cast<int>(mod) == UXTW) {
+ if (offs_is_unsigned == true) {
// Base address.
base_address = data;
// Maximum unsigned positive index.
@@ -8942,6 +8961,8 @@ static void Ldff1Helper(Test* config,
ldff1)(z0.WithLaneSize(esize_in_bits),
all.Zeroing(),
SVEMemOperand(x19, z17.WithLaneSize(esize_in_bits), mod, shift));
+ } else {
+ VIXL_UNIMPLEMENTED();
}
__ Rdffrs(p0.VnB(), all.Zeroing());
@@ -9020,44 +9041,299 @@ TEST_SVE(sve_ldff1_scalar_plus_scalar) {
memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
}
+ auto ldff1_unscaled_offset_helper = std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ std::placeholders::_2,
+ CPURegister::kRegister,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ NO_SVE_OFFSET_MODIFIER,
+ false);
+
Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
Ld1Macro ld1b = &MacroAssembler::Ld1b;
- Ldff1Helper(config, data, kBRegSize, kBRegSize, ldff1b, ld1b, LSL);
- Ldff1Helper(config, data, kBRegSize, kHRegSize, ldff1b, ld1b, LSL);
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1b, ld1b, LSL);
- Ldff1Helper(config, data, kBRegSize, kDRegSize, ldff1b, ld1b, LSL);
+ ldff1_unscaled_offset_helper(kBRegSize, kBRegSize, ldff1b, ld1b);
+ ldff1_unscaled_offset_helper(kBRegSize, kHRegSize, ldff1b, ld1b);
+ ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1b, ld1b);
+ ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1b, ld1b);
+
+ Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
+ Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
+ ldff1_unscaled_offset_helper(kBRegSize, kHRegSize, ldff1sb, ld1sb);
+ ldff1_unscaled_offset_helper(kBRegSize, kSRegSize, ldff1sb, ld1sb);
+ ldff1_unscaled_offset_helper(kBRegSize, kDRegSize, ldff1sb, ld1sb);
+
+ auto ldff1_scaled_offset_helper = std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ std::placeholders::_2,
+ CPURegister::kRegister,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ SVE_LSL,
+ true);
Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
Ld1Macro ld1h = &MacroAssembler::Ld1h;
- Ldff1Helper(config, data, kHRegSize, kHRegSize, ldff1h, ld1h, LSL);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, LSL);
- Ldff1Helper(config, data, kHRegSize, kDRegSize, ldff1h, ld1h, LSL);
+ ldff1_scaled_offset_helper(kHRegSize, kHRegSize, ldff1h, ld1h);
+ ldff1_scaled_offset_helper(kHRegSize, kSRegSize, ldff1h, ld1h);
+ ldff1_scaled_offset_helper(kHRegSize, kDRegSize, ldff1h, ld1h);
Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
Ld1Macro ld1w = &MacroAssembler::Ld1w;
- Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, LSL);
- Ldff1Helper(config, data, kSRegSize, kDRegSize, ldff1w, ld1w, LSL);
+ ldff1_scaled_offset_helper(kSRegSize, kSRegSize, ldff1w, ld1w);
+ ldff1_scaled_offset_helper(kSRegSize, kDRegSize, ldff1w, ld1w);
Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
Ld1Macro ld1d = &MacroAssembler::Ld1d;
- Ldff1Helper(config, data, kDRegSize, kDRegSize, ldff1d, ld1d, LSL);
+ ldff1_scaled_offset_helper(kDRegSize, kDRegSize, ldff1d, ld1d);
+
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ ldff1_scaled_offset_helper(kHRegSize, kSRegSize, ldff1sh, ld1sh);
+ ldff1_scaled_offset_helper(kHRegSize, kDRegSize, ldff1sh, ld1sh);
+
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ ldff1_scaled_offset_helper(kSRegSize, kDRegSize, ldff1sw, ld1sw);
+
+ munmap(reinterpret_cast<void*>(data), page_size * 2);
+}
+
+static void sve_ldff1_scalar_plus_vector_32_scaled_offset(Test* config,
+ uintptr_t data) {
+ auto ldff1_32_scaled_offset_helper = std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ kSRegSize,
+ CPURegister::kZRegister,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ true);
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
+ ldff1_32_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
+ ldff1_32_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+}
+
+static void sve_ldff1_scalar_plus_vector_32_unscaled_offset(Test* config,
+ uintptr_t data) {
+ auto ldff1_32_unscaled_offset_helper = std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ kSRegSize,
+ CPURegister::kZRegister,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ false);
+
+ Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
+ Ld1Macro ld1b = &MacroAssembler::Ld1b;
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW);
+
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
+ ldff1_32_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
- Ldff1Helper(config, data, kBRegSize, kHRegSize, ldff1sb, ld1sb, LSL);
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, LSL);
- Ldff1Helper(config, data, kBRegSize, kDRegSize, ldff1sb, ld1sb, LSL);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW);
+ ldff1_32_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW);
Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, LSL);
- Ldff1Helper(config, data, kHRegSize, kDRegSize, ldff1sh, ld1sh, LSL);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
+ ldff1_32_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+}
+
+static void sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(
+ Test* config, uintptr_t data) {
+ auto ldff1_32_unpacked_scaled_offset_helper =
+ std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ kDRegSize,
+ CPURegister::kZRegister,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ true);
+
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW);
+
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
- Ldff1Helper(config, data, kSRegSize, kDRegSize, ldff1sw, ld1sw, LSL);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW);
+ ldff1_32_unpacked_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW);
+}
+
+static void sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(
+ Test* config, uintptr_t data) {
+ auto ldff1_32_unpacked_unscaled_offset_helper =
+ std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ kDRegSize,
+ CPURegister::kZRegister,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ std::placeholders::_4,
+ false);
- munmap(reinterpret_cast<void*>(data), page_size * 2);
+ Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
+ Ld1Macro ld1b = &MacroAssembler::Ld1b;
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1b, ld1b, SVE_SXTW);
+
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1h, ld1h, SVE_SXTW);
+
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1w, ld1w, SVE_SXTW);
+
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kDRegSize, ldff1d, ld1d, SVE_SXTW);
+
+ Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
+ Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb, SVE_SXTW);
+
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh, SVE_SXTW);
+
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_UXTW);
+ ldff1_32_unpacked_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw, SVE_SXTW);
+}
+
+static void sve_ldff1_scalar_plus_vector_64_scaled_offset(Test* config,
+ uintptr_t data) {
+ auto ldff1_64_scaled_offset_helper = std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ kDRegSize,
+ CPURegister::kZRegister,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ SVE_LSL,
+ true);
+
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ ldff1_64_scaled_offset_helper(kHRegSize, ldff1h, ld1h);
+
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ ldff1_64_scaled_offset_helper(kSRegSize, ldff1w, ld1w);
+
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ ldff1_64_scaled_offset_helper(kDRegSize, ldff1d, ld1d);
+
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ ldff1_64_scaled_offset_helper(kHRegSize, ldff1sh, ld1sh);
+
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ ldff1_64_scaled_offset_helper(kSRegSize, ldff1sw, ld1sw);
+}
+
+static void sve_ldff1_scalar_plus_vector_64_unscaled_offset(Test* config,
+ uintptr_t data) {
+ auto ldff1_64_unscaled_offset_helper = std::bind(&Ldff1Helper,
+ config,
+ data,
+ std::placeholders::_1,
+ kDRegSize,
+ CPURegister::kZRegister,
+ std::placeholders::_2,
+ std::placeholders::_3,
+ NO_SVE_OFFSET_MODIFIER,
+ false);
+
+ Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
+ Ld1Macro ld1b = &MacroAssembler::Ld1b;
+ ldff1_64_unscaled_offset_helper(kBRegSize, ldff1b, ld1b);
+
+ Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
+ Ld1Macro ld1h = &MacroAssembler::Ld1h;
+ ldff1_64_unscaled_offset_helper(kHRegSize, ldff1h, ld1h);
+
+ Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
+ Ld1Macro ld1w = &MacroAssembler::Ld1w;
+ ldff1_64_unscaled_offset_helper(kSRegSize, ldff1w, ld1w);
+
+ Ld1Macro ldff1d = &MacroAssembler::Ldff1d;
+ Ld1Macro ld1d = &MacroAssembler::Ld1d;
+ ldff1_64_unscaled_offset_helper(kDRegSize, ldff1d, ld1d);
+
+ Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
+ Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
+ ldff1_64_unscaled_offset_helper(kBRegSize, ldff1sb, ld1sb);
+
+ Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
+ Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
+ ldff1_64_unscaled_offset_helper(kHRegSize, ldff1sh, ld1sh);
+
+ Ld1Macro ldff1sw = &MacroAssembler::Ldff1sw;
+ Ld1Macro ld1sw = &MacroAssembler::Ld1sw;
+ ldff1_64_unscaled_offset_helper(kSRegSize, ldff1sw, ld1sw);
}
TEST_SVE(sve_ldff1_scalar_plus_vector) {
@@ -9080,38 +9356,12 @@ TEST_SVE(sve_ldff1_scalar_plus_vector) {
memcpy(reinterpret_cast<void*>(data + i), &byte, 1);
}
- Ld1Macro ldff1b = &MacroAssembler::Ldff1b;
- Ld1Macro ld1b = &MacroAssembler::Ld1b;
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1b, ld1b, UXTW);
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1b, ld1b, SXTW);
-
- Ld1Macro ldff1h = &MacroAssembler::Ldff1h;
- Ld1Macro ld1h = &MacroAssembler::Ld1h;
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, UXTW);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, SXTW);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, UXTW, true);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1h, ld1h, SXTW, true);
-
- Ld1Macro ldff1w = &MacroAssembler::Ldff1w;
- Ld1Macro ld1w = &MacroAssembler::Ld1w;
- Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, UXTW);
- Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, SXTW);
- Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, UXTW, true);
- Ldff1Helper(config, data, kSRegSize, kSRegSize, ldff1w, ld1w, SXTW, true);
-
- Ld1Macro ldff1sb = &MacroAssembler::Ldff1sb;
- Ld1Macro ld1sb = &MacroAssembler::Ld1sb;
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, UXTW);
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, SXTW);
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, UXTW, true);
- Ldff1Helper(config, data, kBRegSize, kSRegSize, ldff1sb, ld1sb, SXTW, true);
-
- Ld1Macro ldff1sh = &MacroAssembler::Ldff1sh;
- Ld1Macro ld1sh = &MacroAssembler::Ld1sh;
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, UXTW);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, SXTW);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, UXTW, true);
- Ldff1Helper(config, data, kHRegSize, kSRegSize, ldff1sh, ld1sh, UXTW, true);
+ sve_ldff1_scalar_plus_vector_32_scaled_offset(config, data);
+ sve_ldff1_scalar_plus_vector_32_unscaled_offset(config, data);
+ sve_ldff1_scalar_plus_vector_32_unpacked_scaled_offset(config, data);
+ sve_ldff1_scalar_plus_vector_32_unpacked_unscaled_offset(config, data);
+ sve_ldff1_scalar_plus_vector_64_scaled_offset(config, data);
+ sve_ldff1_scalar_plus_vector_64_unscaled_offset(config, data);
munmap(reinterpret_cast<void*>(data), page_size * 2);
}
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 115a6c34..037cb67d 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -3589,30 +3589,6 @@ TEST(sve_mem_64bit_gather) {
COMPARE_PREFIX(ld1w(z19.VnD(), p1.Zeroing(), x27, z4.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
COMPARE_PREFIX(ld1w(z21.VnD(), p1.Zeroing(), x7, z8.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]");
COMPARE_PREFIX(ld1w(z13.VnD(), p3.Zeroing(), x8, z10.VnD()), "ld1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1b(z20.VnD(), p3.Zeroing(), x26, z11.VnD()), "ldff1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1b(z8.VnD(), p6.Zeroing(), x18, z29.VnD()), "ldff1b { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1d(z7.VnD(), p7.Zeroing(), x24, z10.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]");
- COMPARE_PREFIX(ldff1d(z11.VnD(), p5.Zeroing(), x10, z21.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1d(z25.VnD(), p0.Zeroing(), x21, z15.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #3]");
- COMPARE_PREFIX(ldff1d(z15.VnD(), p7.Zeroing(), x26, z9.VnD()), "ldff1d { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1h(z13.VnD(), p7.Zeroing(), x17, z17.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]");
- COMPARE_PREFIX(ldff1h(z26.VnD(), p7.Zeroing(), x26, z31.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1h(z19.VnD(), p6.Zeroing(), x16, z12.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]");
- COMPARE_PREFIX(ldff1h(z16.VnD(), p1.Zeroing(), x20, z10.VnD()), "ldff1h { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1sb(z3.VnD(), p5.Zeroing(), x18, z3.VnD()), "ldff1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1sb(z31.VnD(), p3.Zeroing(), x13, z0.VnD()), "ldff1sb { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1sh(z2.VnD(), p1.Zeroing(), x8, z8.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]");
- COMPARE_PREFIX(ldff1sh(z4.VnD(), p4.Zeroing(), x17, z18.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1sh(z3.VnD(), p0.Zeroing(), x13, z22.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #1]");
- COMPARE_PREFIX(ldff1sh(z7.VnD(), p3.Zeroing(), x16, z20.VnD()), "ldff1sh { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1sw(z29.VnD(), p0.Zeroing(), x5, z1.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]");
- COMPARE_PREFIX(ldff1sw(z26.VnD(), p3.Zeroing(), x0, z31.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1sw(z25.VnD(), p5.Zeroing(), x4, z17.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]");
- COMPARE_PREFIX(ldff1sw(z7.VnD(), p6.Zeroing(), x12, z16.VnD()), "ldff1sw { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
- COMPARE_PREFIX(ldff1w(z28.VnD(), p5.Zeroing(), x25, z28.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]");
- COMPARE_PREFIX(ldff1w(z20.VnD(), p4.Zeroing(), x30, z7.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]");
- COMPARE_PREFIX(ldff1w(z10.VnD(), p2.Zeroing(), x23, z25.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod> #2]");
- COMPARE_PREFIX(ldff1w(z4.VnD(), p1.Zeroing(), x8, z1.VnD()), "ldff1w { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, <mod>]");
#endif
CLEANUP();
@@ -3719,6 +3695,106 @@ TEST(sve_mem_prefetch) {
CLEANUP();
}
+TEST(sve_mem_64bit_gather_scalar_plus_vector) {
+ SETUP();
+
+ // 64-bit unscaled offset.
+ COMPARE_PREFIX(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())),
+ "ldff1b { z18.d }, p6/z, [x27, z24.d]");
+ COMPARE_PREFIX(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())),
+ "ldff1h { z28.d }, p6/z, [x1, z30.d]");
+ COMPARE_PREFIX(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())),
+ "ldff1w { z12.d }, p3/z, [x25, z27.d]");
+ COMPARE_PREFIX(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())),
+ "ldff1d { z23.d }, p5/z, [x29, z31.d]");
+ COMPARE_PREFIX(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())),
+ "ldff1sb { z15.d }, p5/z, [x5, z14.d]");
+ COMPARE_PREFIX(ldff1sh(z18.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x25, z25.VnD())),
+ "ldff1sh { z18.d }, p4/z, [x25, z25.d]");
+ COMPARE_PREFIX(ldff1sw(z12.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x25, z27.VnD())),
+ "ldff1sw { z12.d }, p3/z, [x25, z27.d]");
+
+ // 64-bit scaled offset.
+ COMPARE_PREFIX(ldff1h(z25.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x17, z15.VnD(), LSL, 1)),
+ "ldff1h { z25.d }, p3/z, [x17, z15.d, lsl #1]");
+ COMPARE_PREFIX(ldff1w(z5.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x23, z31.VnD(), LSL, 2)),
+ "ldff1w { z5.d }, p4/z, [x23, z31.d, lsl #2]");
+ COMPARE_PREFIX(ldff1d(z2.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, z7.VnD(), LSL, 3)),
+ "ldff1d { z2.d }, p0/z, [sp, z7.d, lsl #3]");
+ COMPARE_PREFIX(ldff1sh(z10.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, z15.VnD(), LSL, 1)),
+ "ldff1sh { z10.d }, p0/z, [x19, z15.d, lsl #1]");
+ COMPARE_PREFIX(ldff1sw(z5.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x23, z31.VnD(), LSL, 2)),
+ "ldff1sw { z5.d }, p4/z, [x23, z31.d, lsl #2]");
+
+ // 32-bit unpacked unscaled offset
+ COMPARE_PREFIX(ldff1b(z18.VnD(),
+ p6.Zeroing(),
+ SVEMemOperand(sp, z24.VnD(), UXTW)),
+ "ldff1b { z18.d }, p6/z, [sp, z24.d, uxtw]");
+ COMPARE_PREFIX(ldff1h(z20.VnD(),
+ p5.Zeroing(),
+ SVEMemOperand(x7, z14.VnD(), SXTW)),
+ "ldff1h { z20.d }, p5/z, [x7, z14.d, sxtw]");
+ COMPARE_PREFIX(ldff1w(z22.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x17, z4.VnD(), UXTW)),
+ "ldff1w { z22.d }, p4/z, [x17, z4.d, uxtw]");
+ COMPARE_PREFIX(ldff1d(z24.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x3, z24.VnD(), SXTW)),
+ "ldff1d { z24.d }, p3/z, [x3, z24.d, sxtw]");
+ COMPARE_PREFIX(ldff1sb(z26.VnD(),
+ p2.Zeroing(),
+ SVEMemOperand(x13, z14.VnD(), UXTW)),
+ "ldff1sb { z26.d }, p2/z, [x13, z14.d, uxtw]");
+ COMPARE_PREFIX(ldff1sh(z28.VnD(),
+ p1.Zeroing(),
+ SVEMemOperand(x23, z4.VnD(), SXTW)),
+ "ldff1sh { z28.d }, p1/z, [x23, z4.d, sxtw]");
+ COMPARE_PREFIX(ldff1sw(z30.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x8, z24.VnD(), UXTW)),
+ "ldff1sw { z30.d }, p0/z, [x8, z24.d, uxtw]");
+
+ // 32-bit unpacked scaled offset
+ COMPARE_PREFIX(ldff1h(z4.VnD(),
+ p5.Zeroing(),
+ SVEMemOperand(x7, z1.VnD(), SXTW, 1)),
+ "ldff1h { z4.d }, p5/z, [x7, z1.d, sxtw #1]");
+ COMPARE_PREFIX(ldff1w(z5.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x17, z11.VnD(), UXTW, 2)),
+ "ldff1w { z5.d }, p4/z, [x17, z11.d, uxtw #2]");
+ COMPARE_PREFIX(ldff1d(z6.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x3, z31.VnD(), SXTW, 3)),
+ "ldff1d { z6.d }, p3/z, [x3, z31.d, sxtw #3]");
+ COMPARE_PREFIX(ldff1sh(z7.VnD(),
+ p1.Zeroing(),
+ SVEMemOperand(x23, z7.VnD(), UXTW, 1)),
+ "ldff1sh { z7.d }, p1/z, [x23, z7.d, uxtw #1]");
+ COMPARE_PREFIX(ldff1sw(z8.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x8, z17.VnD(), SXTW, 2)),
+ "ldff1sw { z8.d }, p0/z, [x8, z17.d, sxtw #2]");
+
+ CLEANUP();
+}
+
TEST(sve_ld2_scalar_plus_immediate) {
SETUP();