aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorArtem Serov <artem.serov@linaro.org>2020-11-09 15:26:22 +0000
committerArtem Serov <artem.serov@linaro.org>2020-11-10 15:33:15 +0000
commit5a229a9923d9dab968d7fe186ffa47ac52f9e065 (patch)
treeca689c0233a213244a288044dbb6cfc86d99be60 /src
parentaa1d76b1824ec9bcf66af54fbdc9d137a3c398d5 (diff)
downloadvixl-5a229a9923d9dab968d7fe186ffa47ac52f9e065.tar.gz
Revert^2 "Merge remote-tracking branch 'aosp/upstream-master' into..."
This reverts commit 0a75ba66aa15ea1cdb3f57d0efd4ce7e7c14d45f. Test: mma test-art-host-vixl Test: test.py --host --optimizing --jit --gtest Test: test.py --target --optimizing --jit Test: run-gtests.sh Change-Id: I052ab4d3243b0b9bee4c52d00ba4ef1d93a8d32c
Diffstat (limited to 'src')
-rw-r--r--src/aarch32/disasm-aarch32.cc12
-rw-r--r--src/aarch32/macro-assembler-aarch32.h9
-rw-r--r--src/aarch32/operands-aarch32.h60
-rw-r--r--src/aarch64/assembler-aarch64.cc402
-rw-r--r--src/aarch64/assembler-aarch64.h2651
-rw-r--r--src/aarch64/assembler-sve-aarch64.cc6489
-rw-r--r--src/aarch64/constants-aarch64.h1747
-rw-r--r--src/aarch64/cpu-aarch64.cc215
-rw-r--r--src/aarch64/cpu-aarch64.h61
-rw-r--r--src/aarch64/cpu-features-auditor-aarch64.cc160
-rw-r--r--src/aarch64/decoder-aarch64.cc63
-rw-r--r--src/aarch64/decoder-aarch64.h345
-rw-r--r--src/aarch64/decoder-constants-aarch64.h737
-rw-r--r--src/aarch64/disasm-aarch64.cc5275
-rw-r--r--src/aarch64/disasm-aarch64.h15
-rw-r--r--src/aarch64/instructions-aarch64.cc725
-rw-r--r--src/aarch64/instructions-aarch64.h150
-rw-r--r--src/aarch64/instrument-aarch64.cc975
-rw-r--r--src/aarch64/instrument-aarch64.h117
-rw-r--r--src/aarch64/logic-aarch64.cc2609
-rw-r--r--src/aarch64/macro-assembler-aarch64.cc248
-rw-r--r--src/aarch64/macro-assembler-aarch64.h3400
-rw-r--r--src/aarch64/macro-assembler-sve-aarch64.cc2027
-rw-r--r--src/aarch64/operands-aarch64.cc252
-rw-r--r--src/aarch64/operands-aarch64.h1025
-rw-r--r--src/aarch64/registers-aarch64.cc321
-rw-r--r--src/aarch64/registers-aarch64.h900
-rw-r--r--src/aarch64/simulator-aarch64.cc6138
-rw-r--r--src/aarch64/simulator-aarch64.h1641
-rw-r--r--src/aarch64/simulator-constants-aarch64.h2
-rw-r--r--src/cpu-features.cc112
-rw-r--r--src/cpu-features.h195
-rw-r--r--src/globals-vixl.h10
-rw-r--r--src/invalset-vixl.h4
-rw-r--r--src/pool-manager-impl.h10
-rw-r--r--src/utils-vixl.h161
36 files changed, 35236 insertions, 4027 deletions
diff --git a/src/aarch32/disasm-aarch32.cc b/src/aarch32/disasm-aarch32.cc
index 9ed3a831..535f60c8 100644
--- a/src/aarch32/disasm-aarch32.cc
+++ b/src/aarch32/disasm-aarch32.cc
@@ -8288,13 +8288,13 @@ void Disassembler::DecodeT32(uint32_t instr) {
UnallocatedT32(instr);
return;
}
- unsigned firstcond = (instr >> 20) & 0xf;
+ unsigned first_cond = (instr >> 20) & 0xf;
unsigned mask = (instr >> 16) & 0xf;
- bool wasInITBlock = InITBlock();
- SetIT(Condition(firstcond), mask);
- it(Condition(firstcond), mask);
- if (wasInITBlock || (firstcond == 15) ||
- ((firstcond == al) &&
+ bool was_in_it_block = InITBlock();
+ SetIT(Condition(first_cond), mask);
+ it(Condition(first_cond), mask);
+ if (was_in_it_block || (first_cond == 15) ||
+ ((first_cond == al) &&
(BitCount(Uint32(mask)) != 1))) {
UnpredictableT32(instr);
}
diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h
index d0ff52b3..6d76642f 100644
--- a/src/aarch32/macro-assembler-aarch32.h
+++ b/src/aarch32/macro-assembler-aarch32.h
@@ -268,7 +268,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
pool_end_(NULL) {
#ifdef VIXL_DEBUG
- SetAllowMacroInstructions(true);
+ SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+ true);
#else
USE(allow_macro_instructions_);
#endif
@@ -283,7 +284,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
pool_end_(NULL) {
#ifdef VIXL_DEBUG
- SetAllowMacroInstructions(true);
+ SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+ true);
#endif
}
MacroAssembler(byte* buffer, size_t size, InstructionSet isa = kDefaultISA)
@@ -296,7 +298,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
pool_end_(NULL) {
#ifdef VIXL_DEBUG
- SetAllowMacroInstructions(true);
+ SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+ true);
#endif
}
diff --git a/src/aarch32/operands-aarch32.h b/src/aarch32/operands-aarch32.h
index 1d18bfd3..2b452958 100644
--- a/src/aarch32/operands-aarch32.h
+++ b/src/aarch32/operands-aarch32.h
@@ -54,28 +54,16 @@ class Operand {
// This is allowed to be an implicit constructor because Operand is
// a wrapper class that doesn't normally perform any type conversion.
Operand(uint32_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoReg),
- shift_(LSL),
- amount_(0),
- rs_(NoReg) {}
+ : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
Operand(int32_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoReg),
- shift_(LSL),
- amount_(0),
- rs_(NoReg) {}
+ : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
// rm
// where rm is the base register
// This is allowed to be an implicit constructor because Operand is
// a wrapper class that doesn't normally perform any type conversion.
Operand(Register rm) // NOLINT(runtime/explicit)
- : imm_(0),
- rm_(rm),
- shift_(LSL),
- amount_(0),
- rs_(NoReg) {
+ : imm_(0), rm_(rm), shift_(LSL), amount_(0), rs_(NoReg) {
VIXL_ASSERT(rm_.IsValid());
}
@@ -245,22 +233,18 @@ class NeonImmediate {
// This is allowed to be an implicit constructor because NeonImmediate is
// a wrapper class that doesn't normally perform any type conversion.
NeonImmediate(uint32_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- immediate_type_(I32) {}
+ : imm_(immediate), immediate_type_(I32) {}
NeonImmediate(int immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- immediate_type_(I32) {}
+ : imm_(immediate), immediate_type_(I32) {}
// { #<immediate> }
// where <immediate> is a 64 bit number
// This is allowed to be an implicit constructor because NeonImmediate is
// a wrapper class that doesn't normally perform any type conversion.
NeonImmediate(int64_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- immediate_type_(I64) {}
+ : imm_(immediate), immediate_type_(I64) {}
NeonImmediate(uint64_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- immediate_type_(I64) {}
+ : imm_(immediate), immediate_type_(I64) {}
// { #<immediate> }
// where <immediate> is a non zero floating point number which can be encoded
@@ -268,11 +252,9 @@ class NeonImmediate {
// This is allowed to be an implicit constructor because NeonImmediate is
// a wrapper class that doesn't normally perform any type conversion.
NeonImmediate(float immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- immediate_type_(F32) {}
+ : imm_(immediate), immediate_type_(F32) {}
NeonImmediate(double immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- immediate_type_(F64) {}
+ : imm_(immediate), immediate_type_(F64) {}
NeonImmediate(const NeonImmediate& src)
: imm_(src.imm_), immediate_type_(src.immediate_type_) {}
@@ -374,29 +356,21 @@ std::ostream& operator<<(std::ostream& os, const NeonImmediate& operand);
class NeonOperand {
public:
NeonOperand(int32_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoDReg) {}
+ : imm_(immediate), rm_(NoDReg) {}
NeonOperand(uint32_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoDReg) {}
+ : imm_(immediate), rm_(NoDReg) {}
NeonOperand(int64_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoDReg) {}
+ : imm_(immediate), rm_(NoDReg) {}
NeonOperand(uint64_t immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoDReg) {}
+ : imm_(immediate), rm_(NoDReg) {}
NeonOperand(float immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoDReg) {}
+ : imm_(immediate), rm_(NoDReg) {}
NeonOperand(double immediate) // NOLINT(runtime/explicit)
- : imm_(immediate),
- rm_(NoDReg) {}
+ : imm_(immediate), rm_(NoDReg) {}
NeonOperand(const NeonImmediate& imm) // NOLINT(runtime/explicit)
- : imm_(imm),
- rm_(NoDReg) {}
+ : imm_(imm), rm_(NoDReg) {}
NeonOperand(const VRegister& rm) // NOLINT(runtime/explicit)
- : imm_(0),
- rm_(rm) {
+ : imm_(0), rm_(rm) {
VIXL_ASSERT(rm_.IsValid());
}
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 9e73ffaa..e98de89b 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -1044,7 +1044,7 @@ void Assembler::cls(const Register& rd, const Register& rn) {
V(auti, AUTI) \
V(autd, AUTD)
-#define DEFINE_ASM_FUNCS(PRE, OP) \
+#define VIXL_DEFINE_ASM_FUNC(PRE, OP) \
void Assembler::PRE##a(const Register& xd, const Register& xn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \
@@ -1069,8 +1069,8 @@ void Assembler::cls(const Register& rd, const Register& rn) {
Emit(SF(xd) | OP##ZB | Rd(xd)); \
}
-PAUTH_VARIATIONS(DEFINE_ASM_FUNCS)
-#undef DEFINE_ASM_FUNCS
+PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::pacga(const Register& xd,
const Register& xn,
@@ -1141,7 +1141,13 @@ void Assembler::LoadStorePair(const CPURegister& rt,
addrmodeop = LoadStorePairPostIndexFixed;
}
}
- Emit(addrmodeop | memop);
+
+ Instr emitop = addrmodeop | memop;
+
+ // Only X registers may be specified for ldpsw.
+ VIXL_ASSERT(((emitop & LoadStorePairMask) != LDPSW_x) || rt.IsX());
+
+ Emit(emitop);
}
@@ -1381,10 +1387,16 @@ void Assembler::ldr(const CPURegister& rt, int64_t imm19) {
}
-void Assembler::prfm(PrefetchOperation op, int64_t imm19) {
+void Assembler::prfm(int op, int64_t imm19) {
Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19));
}
+void Assembler::prfm(PrefetchOperation op, int64_t imm19) {
+ // Passing unnamed values in 'op' is undefined behaviour in C++.
+ VIXL_ASSERT(IsNamedPrefetchOperation(op));
+ prfm(static_cast<int>(op), imm19);
+}
+
// Exclusive-access instructions.
void Assembler::stxrb(const Register& rs,
@@ -1635,17 +1647,18 @@ void Assembler::ldlar(const Register& rt, const MemOperand& src) {
V(casal, CASAL)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const Register& rs, \
const Register& rt, \
const MemOperand& src) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \
+ VIXL_ASSERT(AreSameFormat(rs, rt)); \
LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \
Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \
}
-COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
#define COMPARE_AND_SWAP_W_LIST(V) \
@@ -1659,7 +1672,7 @@ COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
V(casalh, CASALH)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const Register& rs, \
const Register& rt, \
const MemOperand& src) { \
@@ -1667,8 +1680,8 @@ COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \
Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \
}
-COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -1679,7 +1692,7 @@ COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
V(caspal, CASPAL)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const Register& rs, \
const Register& rs1, \
const Register& rt, \
@@ -1691,11 +1704,12 @@ COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
VIXL_ASSERT(AreEven(rs, rt)); \
VIXL_ASSERT(AreConsecutive(rs, rs1)); \
VIXL_ASSERT(AreConsecutive(rt, rt1)); \
+ VIXL_ASSERT(AreSameFormat(rs, rs1, rt, rt1)); \
LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \
Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \
}
-COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+COMPARE_AND_SWAP_PAIR_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// These macros generate all the variations of the atomic memory operations,
// e.g. ldadd, ldadda, ldaddb, staddl, etc.
@@ -1846,7 +1860,7 @@ void Assembler::ldapursw(const Register& rt, const MemOperand& src) {
Emit(LDAPURSW | Rt(rt) | base | ImmLS(static_cast<int>(offset)));
}
-void Assembler::prfm(PrefetchOperation op,
+void Assembler::prfm(int op,
const MemOperand& address,
LoadStoreScalingOption option) {
VIXL_ASSERT(option != RequireUnscaledOffset);
@@ -1854,8 +1868,16 @@ void Assembler::prfm(PrefetchOperation op,
Prefetch(op, address, option);
}
+void Assembler::prfm(PrefetchOperation op,
+ const MemOperand& address,
+ LoadStoreScalingOption option) {
+ // Passing unnamed values in 'op' is undefined behaviour in C++.
+ VIXL_ASSERT(IsNamedPrefetchOperation(op));
+ prfm(static_cast<int>(op), address, option);
+}
-void Assembler::prfum(PrefetchOperation op,
+
+void Assembler::prfum(int op,
const MemOperand& address,
LoadStoreScalingOption option) {
VIXL_ASSERT(option != RequireScaledOffset);
@@ -1863,11 +1885,25 @@ void Assembler::prfum(PrefetchOperation op,
Prefetch(op, address, option);
}
+void Assembler::prfum(PrefetchOperation op,
+ const MemOperand& address,
+ LoadStoreScalingOption option) {
+ // Passing unnamed values in 'op' is undefined behaviour in C++.
+ VIXL_ASSERT(IsNamedPrefetchOperation(op));
+ prfum(static_cast<int>(op), address, option);
+}
-void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) {
+
+void Assembler::prfm(int op, RawLiteral* literal) {
prfm(op, static_cast<int>(LinkAndGetWordOffsetTo(literal)));
}
+void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) {
+ // Passing unnamed values in 'op' is undefined behaviour in C++.
+ VIXL_ASSERT(IsNamedPrefetchOperation(op));
+ prfm(static_cast<int>(op), literal);
+}
+
void Assembler::sys(int op1, int crn, int crm, int op2, const Register& xt) {
VIXL_ASSERT(xt.Is64Bits());
@@ -1933,6 +1969,7 @@ void Assembler::LoadStoreStructVerify(const VRegister& vt,
// Assert that addressing mode is either offset (with immediate 0), post
// index by immediate of the size of the register list, or post index by a
// value in a core register.
+ VIXL_ASSERT(vt.HasSize() && vt.HasLaneSize());
if (addr.IsImmediateOffset()) {
VIXL_ASSERT(addr.GetOffset() == 0);
} else {
@@ -2290,6 +2327,7 @@ void Assembler::LoadStoreStructSingle(const VRegister& vt,
// We support vt arguments of the form vt.VxT() or vt.T(), where x is the
// number of lanes, and T is b, h, s or d.
unsigned lane_size = vt.GetLaneSizeInBytes();
+ VIXL_ASSERT(lane_size > 0);
VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size));
// Lane size is encoded in the opcode field. Lane index is encoded in the Q,
@@ -2424,7 +2462,7 @@ void Assembler::NEON3DifferentHN(const VRegister& vd,
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm) { \
@@ -2432,8 +2470,8 @@ void Assembler::FN(const VRegister& vd, \
VIXL_ASSERT(AS); \
NEON3DifferentL(vd, vn, vm, OP); \
}
-NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
#define NEON_3DIFF_HN_LIST(V) \
@@ -2447,7 +2485,7 @@ NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm) { \
@@ -2455,8 +2493,8 @@ NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
VIXL_ASSERT(AS); \
NEON3DifferentHN(vd, vn, vm, OP); \
}
-NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_3DIFF_HN_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::uaddw(const VRegister& vd,
const VRegister& vn,
@@ -3104,7 +3142,7 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd,
V(fcvtau, NEON_FCVTAU, FCVTAU) \
V(fcvtas, NEON_FCVTAS, FCVTAS)
-#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
void Assembler::FN(const Register& rd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
if (vn.IsH()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \
@@ -3119,8 +3157,8 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd,
NEONFPConvertToInt(vd, vn, VEC_OP); \
} \
}
-NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
-#undef DEFINE_ASM_FUNCS
+NEON_FP2REGMISC_FCVT_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
@@ -3308,7 +3346,7 @@ void Assembler::NEON3SameFP16(const VRegister& vd,
V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar, NEON_FRECPE_H_scalar)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
Instr op; \
@@ -3348,8 +3386,8 @@ void Assembler::NEON3SameFP16(const VRegister& vd,
NEONFP2RegMisc(vd, vn, op); \
} \
}
-NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
#define NEON_FP2REGMISC_V85_LIST(V) \
@@ -3359,7 +3397,7 @@ NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
V(frint64z, NEON_FRINT64Z, FRINT64Z)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt)); \
Instr op; \
@@ -3373,8 +3411,8 @@ NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
} \
NEONFP2RegMisc(vd, vn, op); \
}
-NEON_FP2REGMISC_V85_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FP2REGMISC_V85_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::NEONFP2RegMiscFP16(const VRegister& vd,
const VRegister& vn,
@@ -3638,7 +3676,7 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
V(uqrshl, NEON_UQRSHL, true)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm) { \
@@ -3646,8 +3684,8 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(AS); \
NEON3Same(vd, vn, vm, OP); \
}
-NEON_3SAME_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
#define NEON_FP3SAME_OP_LIST(V) \
@@ -3680,7 +3718,7 @@ NEON_3SAME_LIST(DEFINE_ASM_FUNC)
// TODO: This macro is complicated because it classifies the instructions in the
// macro list above, and treats each case differently. It could be somewhat
// simpler if we were to split the macro, at the cost of some duplication.
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm) { \
@@ -3720,8 +3758,8 @@ NEON_3SAME_LIST(DEFINE_ASM_FUNC)
NEONFP3Same(vd, vn, vm, op); \
} \
}
-NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -3732,7 +3770,7 @@ NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
V(fmlsl2, NEON_FMLSL2)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, VEC_OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm) { \
@@ -3744,8 +3782,8 @@ NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
(vd.Is4S() && vn.Is4H() && vm.Is4H())); \
Emit(FPFormat(vd) | VEC_OP | Rm(vm) | Rn(vn) | Rd(vd)); \
}
-NEON_FHM_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FHM_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::addp(const VRegister& vd, const VRegister& vn) {
@@ -4138,7 +4176,7 @@ void Assembler::udot(const VRegister& vd,
V(sqrdmulh, NEON_SQRDMULH_byelement, true) \
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm, \
@@ -4147,8 +4185,8 @@ void Assembler::udot(const VRegister& vd,
VIXL_ASSERT(AS); \
NEONByElement(vd, vn, vm, vm_index, OP); \
}
-NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -4157,7 +4195,7 @@ NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
V(sqrdmlsh, NEON_SQRDMLSH_byelement)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm, \
@@ -4165,8 +4203,8 @@ NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); \
NEONByElement(vd, vn, vm, vm_index, OP); \
}
-NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -4177,7 +4215,7 @@ NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC)
V(fmulx, NEON_FMULX_byelement, NEON_FMULX_H_byelement)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, OP_H) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm, \
@@ -4186,8 +4224,8 @@ NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC)
if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
NEONFPByElement(vd, vn, vm, vm_index, OP, OP_H); \
}
-NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -4213,7 +4251,7 @@ NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm, \
@@ -4222,8 +4260,8 @@ NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
VIXL_ASSERT(AS); \
NEONByElementL(vd, vn, vm, vm_index, OP); \
}
-NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -4235,7 +4273,7 @@ NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const VRegister& vd, \
const VRegister& vn, \
const VRegister& vm, \
@@ -4252,8 +4290,8 @@ NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
Emit(FPFormat(vd) | OP | Rd(vd) | Rn(vn) | Rm(vm) | \
ImmNEONHLM(vm_index, 3)); \
}
-NEON_BYELEMENT_FHM_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_FHM_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
@@ -4763,13 +4801,13 @@ void Assembler::NEONAcrossLanes(const VRegister& vd,
V(uminv, NEON_UMINV)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
NEONAcrossLanes(vd, vn, OP, 0); \
}
-NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_ACROSSLANES_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
// clang-format off
@@ -4780,15 +4818,15 @@ NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
V(fminnmv, NEON_FMINNMV, NEON_FMINNMV_H) \
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, OP_H) \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \
if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
VIXL_ASSERT(vd.Is1S() || vd.Is1H()); \
NEONAcrossLanes(vd, vn, OP, OP_H); \
}
-NEON_ACROSSLANES_FP_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_ACROSSLANES_FP_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
void Assembler::NEONPerm(const VRegister& vd,
@@ -4870,9 +4908,9 @@ void Assembler::NEONShiftLeftImmediate(const VRegister& vd,
const VRegister& vn,
int shift,
NEONShiftImmediateOp op) {
- int laneSizeInBits = vn.GetLaneSizeInBits();
- VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
- NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
+ int lane_size_in_bits = vn.GetLaneSizeInBits();
+ VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+ NEONShiftImmediate(vd, vn, op, (lane_size_in_bits + shift) << 16);
}
@@ -4880,9 +4918,9 @@ void Assembler::NEONShiftRightImmediate(const VRegister& vd,
const VRegister& vn,
int shift,
NEONShiftImmediateOp op) {
- int laneSizeInBits = vn.GetLaneSizeInBits();
- VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
- NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
+ int lane_size_in_bits = vn.GetLaneSizeInBits();
+ VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits));
+ NEONShiftImmediate(vd, vn, op, ((2 * lane_size_in_bits) - shift) << 16);
}
@@ -4890,9 +4928,9 @@ void Assembler::NEONShiftImmediateL(const VRegister& vd,
const VRegister& vn,
int shift,
NEONShiftImmediateOp op) {
- int laneSizeInBits = vn.GetLaneSizeInBits();
- VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
- int immh_immb = (laneSizeInBits + shift) << 16;
+ int lane_size_in_bits = vn.GetLaneSizeInBits();
+ VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+ int immh_immb = (lane_size_in_bits + shift) << 16;
VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
(vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
@@ -4908,9 +4946,9 @@ void Assembler::NEONShiftImmediateN(const VRegister& vd,
int shift,
NEONShiftImmediateOp op) {
Instr q, scalar;
- int laneSizeInBits = vd.GetLaneSizeInBits();
- VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
- int immh_immb = (2 * laneSizeInBits - shift) << 16;
+ int lane_size_in_bits = vd.GetLaneSizeInBits();
+ VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits));
+ int immh_immb = (2 * lane_size_in_bits - shift) << 16;
if (vn.IsScalar()) {
VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
@@ -5271,6 +5309,7 @@ void Assembler::MoveWide(const Register& rd,
} else {
// Calculate a new immediate and shift combination to encode the immediate
// argument.
+ VIXL_ASSERT(shift == -1);
shift = 0;
if ((imm & 0xffffffffffff0000) == 0) {
// Nothing to do.
@@ -5604,7 +5643,7 @@ void Assembler::DataProcExtendedRegister(const Register& rd,
Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
- unsigned access_size,
+ unsigned access_size_in_bytes_log2,
LoadStoreScalingOption option) {
Instr base = RnSP(addr.GetBaseRegister());
int64_t offset = addr.GetOffset();
@@ -5614,21 +5653,22 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
(option == PreferUnscaledOffset) || (option == RequireUnscaledOffset);
if (prefer_unscaled && IsImmLSUnscaled(offset)) {
// Use the unscaled addressing mode.
- return base | LoadStoreUnscaledOffsetFixed |
- ImmLS(static_cast<int>(offset));
+ return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset);
}
if ((option != RequireUnscaledOffset) &&
- IsImmLSScaled(offset, access_size)) {
+ IsImmLSScaled(offset, access_size_in_bytes_log2)) {
+ // We need `offset` to be positive for the shift to be well-defined.
+ // IsImmLSScaled should check this.
+ VIXL_ASSERT(offset >= 0);
// Use the scaled addressing mode.
return base | LoadStoreUnsignedOffsetFixed |
- ImmLSUnsigned(static_cast<int>(offset) >> access_size);
+ ImmLSUnsigned(offset >> access_size_in_bytes_log2);
}
if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) {
// Use the unscaled addressing mode.
- return base | LoadStoreUnscaledOffsetFixed |
- ImmLS(static_cast<int>(offset));
+ return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset);
}
}
@@ -5649,17 +5689,17 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
// Shifts are encoded in one bit, indicating a left shift by the memory
// access size.
- VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size));
+ VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size_in_bytes_log2));
return base | LoadStoreRegisterOffsetFixed | Rm(addr.GetRegisterOffset()) |
ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0);
}
if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) {
- return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset));
+ return base | LoadStorePreIndexFixed | ImmLS(offset);
}
if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) {
- return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset));
+ return base | LoadStorePostIndexFixed | ImmLS(offset);
}
// If this point is reached, the MemOperand (addr) cannot be encoded.
@@ -5694,7 +5734,7 @@ void Assembler::LoadStorePAC(const Register& xt,
}
-void Assembler::Prefetch(PrefetchOperation op,
+void Assembler::Prefetch(int op,
const MemOperand& addr,
LoadStoreScalingOption option) {
VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset());
@@ -5703,6 +5743,14 @@ void Assembler::Prefetch(PrefetchOperation op,
Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option));
}
+void Assembler::Prefetch(PrefetchOperation op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option) {
+ // Passing unnamed values in 'op' is undefined behaviour in C++.
+ VIXL_ASSERT(IsNamedPrefetchOperation(op));
+ Prefetch(static_cast<int>(op), addr, option);
+}
+
bool Assembler::IsImmAddSub(int64_t immediate) {
return IsUint12(immediate) ||
@@ -5788,17 +5836,17 @@ bool Assembler::IsImmFP64(double imm) {
}
-bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
- VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
- return IsMultiple(offset, 1 << access_size) &&
- IsInt7(offset / (1 << access_size));
+bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2) {
+ VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2);
+ return IsMultiple(offset, 1 << access_size_in_bytes_log2) &&
+ IsInt7(offset / (1 << access_size_in_bytes_log2));
}
-bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) {
- VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
- return IsMultiple(offset, 1 << access_size) &&
- IsUint12(offset / (1 << access_size));
+bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2) {
+ VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2);
+ return IsMultiple(offset, 1 << access_size_in_bytes_log2) &&
+ IsUint12(offset / (1 << access_size_in_bytes_log2));
}
@@ -5832,7 +5880,8 @@ bool Assembler::IsImmLogical(uint64_t value,
unsigned* n,
unsigned* imm_s,
unsigned* imm_r) {
- VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize));
+ VIXL_ASSERT((width == kBRegSize) || (width == kHRegSize) ||
+ (width == kSRegSize) || (width == kDRegSize));
bool negate = false;
@@ -5873,16 +5922,18 @@ bool Assembler::IsImmLogical(uint64_t value,
value = ~value;
}
- if (width == kWRegSize) {
- // To handle 32-bit logical immediates, the very easiest thing is to repeat
- // the input value twice to make a 64-bit word. The correct encoding of that
- // as a logical immediate will also be the correct encoding of the 32-bit
- // value.
+ if (width <= kWRegSize) {
+ // To handle 8/16/32-bit logical immediates, the very easiest thing is to repeat
+ // the input value to fill a 64-bit word. The correct encoding of that as a
+ // logical immediate will also be the correct encoding of the value.
- // Avoid making the assumption that the most-significant 32 bits are zero by
+ // Avoid making the assumption that the most-significant 56/48/32 bits are zero by
// shifting the value left and duplicating it.
- value <<= kWRegSize;
- value |= value >> kWRegSize;
+ for (unsigned bits = width; bits <= kWRegSize; bits *= 2) {
+ value <<= bits;
+ uint64_t mask = (UINT64_C(1) << bits) - 1;
+ value |= ((value >> bits) & mask);
+ }
}
// The basic analysis idea: imagine our input word looks like this.
@@ -6186,152 +6237,5 @@ bool Assembler::CPUHas(SystemRegister sysreg) const {
}
-bool AreAliased(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3,
- const CPURegister& reg4,
- const CPURegister& reg5,
- const CPURegister& reg6,
- const CPURegister& reg7,
- const CPURegister& reg8) {
- int number_of_valid_regs = 0;
- int number_of_valid_fpregs = 0;
-
- RegList unique_regs = 0;
- RegList unique_fpregs = 0;
-
- const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
-
- for (size_t i = 0; i < ArrayLength(regs); i++) {
- if (regs[i].IsRegister()) {
- number_of_valid_regs++;
- unique_regs |= regs[i].GetBit();
- } else if (regs[i].IsVRegister()) {
- number_of_valid_fpregs++;
- unique_fpregs |= regs[i].GetBit();
- } else {
- VIXL_ASSERT(!regs[i].IsValid());
- }
- }
-
- int number_of_unique_regs = CountSetBits(unique_regs);
- int number_of_unique_fpregs = CountSetBits(unique_fpregs);
-
- VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
- VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs);
-
- return (number_of_valid_regs != number_of_unique_regs) ||
- (number_of_valid_fpregs != number_of_unique_fpregs);
-}
-
-
-bool AreSameSizeAndType(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3,
- const CPURegister& reg4,
- const CPURegister& reg5,
- const CPURegister& reg6,
- const CPURegister& reg7,
- const CPURegister& reg8) {
- VIXL_ASSERT(reg1.IsValid());
- bool match = true;
- match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
- match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
- match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
- match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
- match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
- match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
- match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
- return match;
-}
-
-bool AreEven(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3,
- const CPURegister& reg4,
- const CPURegister& reg5,
- const CPURegister& reg6,
- const CPURegister& reg7,
- const CPURegister& reg8) {
- VIXL_ASSERT(reg1.IsValid());
- bool even = (reg1.GetCode() % 2) == 0;
- even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
- even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
- even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
- even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
- even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
- even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
- even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
- return even;
-}
-
-
-bool AreConsecutive(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3,
- const CPURegister& reg4) {
- VIXL_ASSERT(reg1.IsValid());
-
- if (!reg2.IsValid()) {
- return true;
- } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfRegisters)) {
- return false;
- }
-
- if (!reg3.IsValid()) {
- return true;
- } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfRegisters)) {
- return false;
- }
-
- if (!reg4.IsValid()) {
- return true;
- } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfRegisters)) {
- return false;
- }
-
- return true;
-}
-
-
-bool AreSameFormat(const VRegister& reg1,
- const VRegister& reg2,
- const VRegister& reg3,
- const VRegister& reg4) {
- VIXL_ASSERT(reg1.IsValid());
- bool match = true;
- match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
- match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
- match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
- return match;
-}
-
-
-bool AreConsecutive(const VRegister& reg1,
- const VRegister& reg2,
- const VRegister& reg3,
- const VRegister& reg4) {
- VIXL_ASSERT(reg1.IsValid());
-
- if (!reg2.IsValid()) {
- return true;
- } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfVRegisters)) {
- return false;
- }
-
- if (!reg3.IsValid()) {
- return true;
- } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfVRegisters)) {
- return false;
- }
-
- if (!reg4.IsValid()) {
- return true;
- } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfVRegisters)) {
- return false;
- }
-
- return true;
-}
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 3ccda1a6..f7aafd07 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -1089,18 +1089,6 @@ class Assembler : public vixl::internal::AssemblerBase {
// zero [Armv8.3].
void pacdza(const Register& xd);
- // Pointer Authentication Code for Data address, using key A, with address in
- // x17 and modifier in x16 [Armv8.3].
- void pacda1716();
-
- // Pointer Authentication Code for Data address, using key A, with address in
- // LR and modifier in SP [Armv8.3].
- void pacdasp();
-
- // Pointer Authentication Code for Data address, using key A, with address in
- // LR and a modifier of zero [Armv8.3].
- void pacdaz();
-
// Pointer Authentication Code for Data address, using key B [Armv8.3].
void pacdb(const Register& xd, const Register& xn);
@@ -1108,18 +1096,6 @@ class Assembler : public vixl::internal::AssemblerBase {
// zero [Armv8.3].
void pacdzb(const Register& xd);
- // Pointer Authentication Code for Data address, using key B, with address in
- // x17 and modifier in x16 [Armv8.3].
- void pacdb1716();
-
- // Pointer Authentication Code for Data address, using key B, with address in
- // LR and modifier in SP [Armv8.3].
- void pacdbsp();
-
- // Pointer Authentication Code for Data address, using key B, with address in
- // LR and a modifier of zero [Armv8.3].
- void pacdbz();
-
// Pointer Authentication Code, using Generic key [Armv8.3].
void pacga(const Register& xd, const Register& xn, const Register& xm);
@@ -1167,36 +1143,12 @@ class Assembler : public vixl::internal::AssemblerBase {
// Authenticate Data address, using key A and a modifier of zero [Armv8.3].
void autdza(const Register& xd);
- // Authenticate Data address, using key A, with address in x17 and modifier in
- // x16 [Armv8.3].
- void autda1716();
-
- // Authenticate Data address, using key A, with address in LR and modifier in
- // SP [Armv8.3].
- void autdasp();
-
- // Authenticate Data address, using key A, with address in LR and a modifier
- // of zero [Armv8.3].
- void autdaz();
-
// Authenticate Data address, using key B [Armv8.3].
void autdb(const Register& xd, const Register& xn);
// Authenticate Data address, using key B and a modifier of zero [Armv8.3].
void autdzb(const Register& xd);
- // Authenticate Data address, using key B, with address in x17 and modifier in
- // x16 [Armv8.3].
- void autdb1716();
-
- // Authenticate Data address, using key B, with address in LR and modifier in
- // SP [Armv8.3].
- void autdbsp();
-
- // Authenticate Data address, using key B, with address in LR and a modifier
- // of zero [Armv8.3].
- void autdbz();
-
// Strip Pointer Authentication Code of Data address [Armv8.3].
void xpacd(const Register& xd);
@@ -2112,6 +2064,22 @@ class Assembler : public vixl::internal::AssemblerBase {
// Prefetch from pc + imm19 << 2.
void prfm(PrefetchOperation op, int64_t imm19);
+ // Prefetch memory (allowing unallocated hints).
+ void prfm(int op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option = PreferScaledOffset);
+
+ // Prefetch memory (with unscaled offset, allowing unallocated hints).
+ void prfum(int op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option = PreferUnscaledOffset);
+
+ // Prefetch memory in the literal pool (allowing unallocated hints).
+ void prfm(int op, RawLiteral* literal);
+
+ // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
+ void prfm(int op, int64_t imm19);
+
// Move instructions. The default shift of -1 indicates that the move
// instruction will calculate an appropriate 16-bit immediate and left shift
// that is equal to the 64-bit immediate argument. If an explicit left shift
@@ -3618,6 +3586,2240 @@ class Assembler : public vixl::internal::AssemblerBase {
const VRegister& vm,
int rot);
+ // Scalable Vector Extensions.
+
+ // Absolute value (predicated).
+ void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Add vectors (predicated).
+ void add(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Add vectors (unpredicated).
+ void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Add immediate (unpredicated).
+ void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
+
+ // Add multiple of predicate register size to scalar register.
+ void addpl(const Register& xd, const Register& xn, int imm6);
+
+ // Add multiple of vector register size to scalar register.
+ void addvl(const Register& xd, const Register& xn, int imm6);
+
+ // Compute vector address.
+ void adr(const ZRegister& zd, const SVEMemOperand& addr);
+
+ // Bitwise AND predicates.
+ void and_(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise AND vectors (predicated).
+ void and_(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise AND with immediate (unpredicated).
+ void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+ // Bitwise AND vectors (unpredicated).
+ void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Bitwise AND predicates.
+ void ands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise AND reduction to scalar.
+ void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Arithmetic shift right by immediate (predicated).
+ void asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Arithmetic shift right by 64-bit wide elements (predicated).
+ void asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Arithmetic shift right by immediate (unpredicated).
+ void asr(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Arithmetic shift right by 64-bit wide elements (unpredicated).
+ void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Arithmetic shift right for divide by immediate (predicated).
+ void asrd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Reversed arithmetic shift right by vector (predicated).
+ void asrr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise clear predicates.
+ void bic(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise clear vectors (predicated).
+ void bic(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise clear bits using immediate (unpredicated).
+ void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+ // Bitwise clear vectors (unpredicated).
+ void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Bitwise clear predicates.
+ void bics(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Break after first true condition.
+ void brka(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Break after first true condition.
+ void brkas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Break before first true condition.
+ void brkb(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Break before first true condition.
+ void brkbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Propagate break to next partition.
+ void brkn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Propagate break to next partition.
+ void brkns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Break after first true condition, propagating from previous partition.
+ void brkpa(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Break after first true condition, propagating from previous partition.
+ void brkpas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Break before first true condition, propagating from previous partition.
+ void brkpb(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Break before first true condition, propagating from previous partition.
+ void brkpbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Conditionally extract element after last to general-purpose register.
+ void clasta(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm);
+
+ // Conditionally extract element after last to SIMD&FP scalar register.
+ void clasta(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm);
+
+ // Conditionally extract element after last to vector register.
+ void clasta(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Conditionally extract last element to general-purpose register.
+ void clastb(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm);
+
+ // Conditionally extract last element to SIMD&FP scalar register.
+ void clastb(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm);
+
+ // Conditionally extract last element to vector register.
+ void clastb(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Count leading sign bits (predicated).
+ void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Count leading zero bits (predicated).
+ void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ void cmp(Condition cond,
+ const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to 64-bit wide elements.
+ void cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Compare vector to 64-bit wide elements.
+ void cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Compare vector to 64-bit wide elements.
+ void cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Compare vector to 64-bit wide elements.
+ void cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7);
+
+ // Compare vector to 64-bit wide elements.
+ void cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7);
+
+ // Compare vector to 64-bit wide elements.
+ void cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Compare vector to 64-bit wide elements.
+ void cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7);
+
+ // Compare vector to 64-bit wide elements.
+ void cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7);
+
+ // Compare vector to 64-bit wide elements.
+ void cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Compare vector to 64-bit wide elements.
+ void cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Compare vector to immediate.
+ void cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Logically invert boolean condition in vector (predicated).
+ void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Count non-zero bits (predicated).
+ void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Set scalar to multiple of predicate constraint element count.
+ void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Set scalar to multiple of predicate constraint element count.
+ void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Set scalar to multiple of predicate constraint element count.
+ void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Set scalar to active predicate element count.
+ void cntp(const Register& xd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Set scalar to multiple of predicate constraint element count.
+ void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Shuffle active elements of vector to the right and fill with zero.
+ void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
+
+ // Copy signed integer immediate to vector elements (predicated).
+ void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
+
+ // Copy general-purpose register to vector elements (predicated).
+ void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
+
+ // Copy SIMD&FP scalar register to vector elements (predicated).
+ void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
+
+ // Compare and terminate loop.
+ void ctermeq(const Register& rn, const Register& rm);
+
+ // Compare and terminate loop.
+ void ctermne(const Register& rn, const Register& rm);
+
+ // Decrement scalar by multiple of predicate constraint element count.
+ void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Decrement scalar by multiple of predicate constraint element count.
+ void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Decrement vector by multiple of predicate constraint element count.
+ void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Decrement scalar by multiple of predicate constraint element count.
+ void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Decrement vector by multiple of predicate constraint element count.
+ void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Decrement scalar by active predicate element count.
+ void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+ // Decrement vector by active predicate element count.
+ void decp(const ZRegister& zdn, const PRegister& pg);
+
+ // Decrement scalar by multiple of predicate constraint element count.
+ void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Decrement vector by multiple of predicate constraint element count.
+ void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Broadcast general-purpose register to vector elements (unpredicated).
+ void dup(const ZRegister& zd, const Register& xn);
+
+ // Broadcast indexed element to vector (unpredicated).
+ void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
+
+ // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
+ // assembler will pick an appropriate immediate and left shift that is
+ // equivalent to the immediate argument. If an explicit left shift is
+ // specified (0 or 8), the immediate must be a signed 8-bit integer.
+
+ // Broadcast signed immediate to vector elements (unpredicated).
+ void dup(const ZRegister& zd, int imm8, int shift = -1);
+
+ // Broadcast logical bitmask immediate to vector (unpredicated).
+ void dupm(const ZRegister& zd, uint64_t imm);
+
+ // Bitwise exclusive OR with inverted immediate (unpredicated).
+ void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+ // Bitwise exclusive OR predicates.
+ void eor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise exclusive OR vectors (predicated).
+ void eor(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise exclusive OR with immediate (unpredicated).
+ void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+ // Bitwise exclusive OR vectors (unpredicated).
+ void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Bitwise exclusive OR predicates.
+ void eors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise XOR reduction to scalar.
+ void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Extract vector from pair of vectors.
+ void ext(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned offset);
+
+ // Floating-point absolute difference (predicated).
+ void fabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point absolute value (predicated).
+ void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point absolute compare vectors.
+ void facge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point absolute compare vectors.
+ void facgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point add immediate (predicated).
+ void fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point add vector (predicated).
+ void fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point add vector (unpredicated).
+ void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point add strictly-ordered reduction, accumulating in scalar.
+ void fadda(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm);
+
+ // Floating-point add recursive reduction to scalar.
+ void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Floating-point complex add with rotate (predicated).
+ void fcadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Floating-point compare vector with zero.
+ void fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero);
+
+ // Floating-point compare vectors.
+ void fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point compare vector with zero.
+ void fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero);
+
+ // Floating-point compare vectors.
+ void fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point compare vector with zero.
+ void fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero);
+
+ // Floating-point compare vectors.
+ void fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point complex multiply-add with rotate (predicated).
+ void fcmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Floating-point complex multiply-add by indexed values with rotate.
+ void fcmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+
+ // Floating-point compare vector with zero.
+ void fcmle(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero);
+
+ // Floating-point compare vector with zero.
+ void fcmlt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero);
+
+ // Floating-point compare vector with zero.
+ void fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero);
+
+ // Floating-point compare vectors.
+ void fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point compare vectors.
+ void fcmuo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Copy floating-point immediate to vector elements (predicated).
+ void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
+
+ // Copy half-precision floating-point immediate to vector elements
+ // (predicated).
+ void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
+ fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
+ }
+
+ // Floating-point convert precision (predicated).
+ void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point convert to signed integer, rounding toward zero
+ // (predicated).
+ void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point convert to unsigned integer, rounding toward zero
+ // (predicated).
+ void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point divide by vector (predicated).
+ void fdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point reversed divide by vector (predicated).
+ void fdivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Broadcast floating-point immediate to vector elements.
+ void fdup(const ZRegister& zd, double imm);
+
+ // Broadcast half-precision floating-point immediate to vector elements.
+ void fdup(const ZRegister& zd, Float16 imm) {
+ fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
+ }
+
+ // Floating-point exponential accelerator.
+ void fexpa(const ZRegister& zd, const ZRegister& zn);
+
+ // Floating-point fused multiply-add vectors (predicated), writing
+ // multiplicand [Zdn = Za + Zdn * Zm].
+ void fmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za);
+
+ // Floating-point maximum with immediate (predicated).
+ void fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point maximum (predicated).
+ void fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point maximum number with immediate (predicated).
+ void fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point maximum number (predicated).
+ void fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point maximum number recursive reduction to scalar.
+ void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Floating-point maximum recursive reduction to scalar.
+ void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Floating-point minimum with immediate (predicated).
+ void fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point minimum (predicated).
+ void fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point minimum number with immediate (predicated).
+ void fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point minimum number (predicated).
+ void fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point minimum number recursive reduction to scalar.
+ void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Floating-point minimum recursive reduction to scalar.
+ void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Floating-point fused multiply-add vectors (predicated), writing addend
+ // [Zda = Zda + Zn * Zm].
+ void fmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point fused multiply-add by indexed elements
+ // (Zda = Zda + Zn * Zm[indexed]).
+ void fmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Floating-point fused multiply-subtract vectors (predicated), writing
+ // addend [Zda = Zda + -Zn * Zm].
+ void fmls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point fused multiply-subtract by indexed elements
+ // (Zda = Zda + -Zn * Zm[indexed]).
+ void fmls(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Move 8-bit floating-point immediate to vector elements (unpredicated).
+ void fmov(const ZRegister& zd, double imm);
+
+ // Move 8-bit floating-point immediate to vector elements (predicated).
+ void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
+
+ // Floating-point fused multiply-subtract vectors (predicated), writing
+ // multiplicand [Zdn = Za + -Zdn * Zm].
+ void fmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za);
+
+ // Floating-point multiply by immediate (predicated).
+ void fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point multiply vectors (predicated).
+ void fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point multiply by indexed elements.
+ void fmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned index);
+
+ // Floating-point multiply vectors (unpredicated).
+ void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point multiply-extended vectors (predicated).
+ void fmulx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point negate (predicated).
+ void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point negated fused multiply-add vectors (predicated), writing
+ // multiplicand [Zdn = -Za + -Zdn * Zm].
+ void fnmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za);
+
+ // Floating-point negated fused multiply-add vectors (predicated), writing
+ // addend [Zda = -Zda + -Zn * Zm].
+ void fnmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point negated fused multiply-subtract vectors (predicated),
+ // writing addend [Zda = -Zda + Zn * Zm].
+ void fnmls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point negated fused multiply-subtract vectors (predicated),
+ // writing multiplicand [Zdn = -Za + Zdn * Zm].
+ void fnmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za);
+
+ // Floating-point reciprocal estimate (unpredicated).
+ void frecpe(const ZRegister& zd, const ZRegister& zn);
+
+ // Floating-point reciprocal step (unpredicated).
+ void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point reciprocal exponent (predicated).
+ void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point round to integral value (predicated).
+ void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point reciprocal square root estimate (unpredicated).
+ void frsqrte(const ZRegister& zd, const ZRegister& zn);
+
+ // Floating-point reciprocal square root step (unpredicated).
+ void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point adjust exponent by vector (predicated).
+ void fscale(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point square root (predicated).
+ void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point subtract immediate (predicated).
+ void fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point subtract vectors (predicated).
+ void fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point subtract vectors (unpredicated).
+ void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point reversed subtract from immediate (predicated).
+ void fsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm);
+
+ // Floating-point reversed subtract vectors (predicated).
+ void fsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point trigonometric multiply-add coefficient.
+ void ftmad(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm3);
+
+ // Floating-point trigonometric starting value.
+ void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point trigonometric select coefficient.
+ void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Increment scalar by multiple of predicate constraint element count.
+ void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Increment scalar by multiple of predicate constraint element count.
+ void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Increment vector by multiple of predicate constraint element count.
+ void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Increment scalar by multiple of predicate constraint element count.
+ void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Increment vector by multiple of predicate constraint element count.
+ void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Increment scalar by active predicate element count.
+ void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+ // Increment vector by active predicate element count.
+ void incp(const ZRegister& zdn, const PRegister& pg);
+
+ // Increment scalar by multiple of predicate constraint element count.
+ void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Increment vector by multiple of predicate constraint element count.
+ void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Create index starting from and incremented by immediate.
+ void index(const ZRegister& zd, int start, int step);
+
+ // Create index starting from and incremented by general-purpose register.
+ void index(const ZRegister& zd, const Register& rn, const Register& rm);
+
+ // Create index starting from general-purpose register and incremented by
+ // immediate.
+ void index(const ZRegister& zd, const Register& rn, int imm5);
+
+ // Create index starting from immediate and incremented by general-purpose
+ // register.
+ void index(const ZRegister& zd, int imm5, const Register& rm);
+
+ // Insert general-purpose register in shifted vector.
+ void insr(const ZRegister& zdn, const Register& rm);
+
+ // Insert SIMD&FP scalar register in shifted vector.
+ void insr(const ZRegister& zdn, const VRegister& vm);
+
+ // Extract element after last to general-purpose register.
+ void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
+
+ // Extract element after last to SIMD&FP scalar register.
+ void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Extract last element to general-purpose register.
+ void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
+
+ // Extract last element to SIMD&FP scalar register.
+ void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Contiguous/gather load bytes to vector.
+ void ld1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/gather load halfwords to vector.
+ void ld1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/gather load words to vector.
+ void ld1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/gather load doublewords to vector.
+ void ld1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // TODO: Merge other loads into the SVEMemOperand versions.
+
+ // Load and broadcast unsigned byte to vector.
+ void ld1rb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load and broadcast unsigned halfword to vector.
+ void ld1rh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load and broadcast unsigned word to vector.
+ void ld1rw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load and broadcast doubleword to vector.
+ void ld1rd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate sixteen bytes.
+ void ld1rqb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate eight halfwords.
+ void ld1rqh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate four words.
+ void ld1rqw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate two doublewords.
+ void ld1rqd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load and broadcast signed byte to vector.
+ void ld1rsb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load and broadcast signed halfword to vector.
+ void ld1rsh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load and broadcast signed word to vector.
+ void ld1rsw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/gather load signed bytes to vector.
+ void ld1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/gather load signed halfwords to vector.
+ void ld1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/gather load signed words to vector.
+ void ld1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // TODO: Merge other loads into the SVEMemOperand versions.
+
+ // Contiguous load two-byte structures to two vectors.
+ void ld2b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load two-halfword structures to two vectors.
+ void ld2h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load two-word structures to two vectors.
+ void ld2w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load two-doubleword structures to two vectors.
+ void ld2d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load three-byte structures to three vectors.
+ void ld3b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load three-halfword structures to three vectors.
+ void ld3h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load three-word structures to three vectors.
+ void ld3w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load three-doubleword structures to three vectors.
+ void ld3d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load four-byte structures to four vectors.
+ void ld4b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load four-halfword structures to four vectors.
+ void ld4h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load four-word structures to four vectors.
+ void ld4w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load four-doubleword structures to four vectors.
+ void ld4d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault unsigned bytes to vector.
+ void ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault unsigned halfwords to vector.
+ void ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault unsigned words to vector.
+ void ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault doublewords to vector.
+ void ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault signed bytes to vector.
+ void ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault signed halfwords to vector.
+ void ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load first-fault signed words to vector.
+ void ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Gather load first-fault unsigned bytes to vector.
+ void ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault unsigned bytes to vector (immediate index).
+ void ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Gather load first-fault doublewords to vector (vector index).
+ void ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault doublewords to vector (immediate index).
+ void ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Gather load first-fault unsigned halfwords to vector (vector index).
+ void ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault unsigned halfwords to vector (immediate index).
+ void ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Gather load first-fault signed bytes to vector (vector index).
+ void ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault signed bytes to vector (immediate index).
+ void ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Gather load first-fault signed halfwords to vector (vector index).
+ void ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault signed halfwords to vector (immediate index).
+ void ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Gather load first-fault signed words to vector (vector index).
+ void ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault signed words to vector (immediate index).
+ void ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Gather load first-fault unsigned words to vector (vector index).
+ void ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm);
+
+ // Gather load first-fault unsigned words to vector (immediate index).
+ void ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5);
+
+ // Contiguous load non-fault unsigned bytes to vector (immediate index).
+ void ldnf1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-fault doublewords to vector (immediate index).
+ void ldnf1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-fault unsigned halfwords to vector (immediate
+ // index).
+ void ldnf1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-fault signed bytes to vector (immediate index).
+ void ldnf1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-fault signed halfwords to vector (immediate index).
+ void ldnf1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-fault signed words to vector (immediate index).
+ void ldnf1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-fault unsigned words to vector (immediate index).
+ void ldnf1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-temporal bytes to vector.
+ void ldnt1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-temporal halfwords to vector.
+ void ldnt1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-temporal words to vector.
+ void ldnt1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load non-temporal doublewords to vector.
+ void ldnt1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Load SVE predicate/vector register.
+ void ldr(const CPURegister& rt, const SVEMemOperand& addr);
+
+ // Logical shift left by immediate (predicated).
+ void lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Logical shift left by 64-bit wide elements (predicated).
+ void lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Logical shift left by immediate (unpredicated).
+ void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Logical shift left by 64-bit wide elements (unpredicated).
+ void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Reversed logical shift left by vector (predicated).
+ void lslr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Logical shift right by immediate (predicated).
+ void lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Logical shift right by 64-bit wide elements (predicated).
+ void lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Logical shift right by immediate (unpredicated).
+ void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Logical shift right by 64-bit wide elements (unpredicated).
+ void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Reversed logical shift right by vector (predicated).
+ void lsrr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise invert predicate.
+ void not_(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Bitwise invert predicate, setting the condition flags.
+ void nots(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Multiply-add vectors (predicated), writing multiplicand
+ // [Zdn = Za + Zdn * Zm].
+ void mad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za);
+
+ // Multiply-add vectors (predicated), writing addend
+ // [Zda = Zda + Zn * Zm].
+ void mla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Multiply-subtract vectors (predicated), writing addend
+ // [Zda = Zda - Zn * Zm].
+ void mls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Move predicates (unpredicated)
+ void mov(const PRegister& pd, const PRegister& pn);
+
+ // Move predicates (merging)
+ void mov(const PRegisterWithLaneSize& pd,
+ const PRegisterM& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Move predicates (zeroing)
+ void mov(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Move general-purpose register to vector elements (unpredicated)
+ void mov(const ZRegister& zd, const Register& xn);
+
+ // Move SIMD&FP scalar register to vector elements (unpredicated)
+ void mov(const ZRegister& zd, const VRegister& vn);
+
+ // Move vector register (unpredicated)
+ void mov(const ZRegister& zd, const ZRegister& zn);
+
+ // Move indexed element to vector elements (unpredicated)
+ void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
+
+ // Move general-purpose register to vector elements (predicated)
+ void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
+
+ // Move SIMD&FP scalar register to vector elements (predicated)
+ void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
+
+ // Move vector elements (predicated)
+ void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Move signed integer immediate to vector elements (predicated)
+ void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
+
+ // Move signed immediate to vector elements (unpredicated).
+ void mov(const ZRegister& zd, int imm8, int shift);
+
+ // Move logical bitmask immediate to vector (unpredicated).
+ void mov(const ZRegister& zd, uint64_t imm);
+
+ // Move predicate (unpredicated), setting the condition flags
+ void movs(const PRegister& pd, const PRegister& pn);
+
+ // Move predicates (zeroing), setting the condition flags
+ void movs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Move prefix (predicated).
+ void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
+
+ // Move prefix (unpredicated).
+ void movprfx(const ZRegister& zd, const ZRegister& zn);
+
+ // Multiply-subtract vectors (predicated), writing multiplicand
+ // [Zdn = Za - Zdn * Zm].
+ void msb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za);
+
+ // Multiply vectors (predicated).
+ void mul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Multiply by immediate (unpredicated).
+ void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+ // Bitwise NAND predicates.
+ void nand(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise NAND predicates.
+ void nands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Negate (predicated).
+ void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Bitwise NOR predicates.
+ void nor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise NOR predicates.
+ void nors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise invert vector (predicated).
+ void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Bitwise OR inverted predicate.
+ void orn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise OR inverted predicate.
+ void orns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise OR with inverted immediate (unpredicated).
+ void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+ // Bitwise OR predicate.
+ void orr(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise OR vectors (predicated).
+ void orr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise OR with immediate (unpredicated).
+ void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+ // Bitwise OR vectors (unpredicated).
+ void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Bitwise OR predicate.
+ void orrs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Bitwise OR reduction to scalar.
+ void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Set all predicate elements to false.
+ void pfalse(const PRegisterWithLaneSize& pd);
+
+ // Set the first active predicate element to true.
+ void pfirst(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Find next active predicate.
+ void pnext(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+
+ // Prefetch bytes.
+ void prfb(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Prefetch halfwords.
+ void prfh(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Prefetch words.
+ void prfw(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Prefetch doublewords.
+ void prfd(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Set condition flags for predicate.
+ void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
+
+ // Initialise predicate from named constraint.
+ void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
+
+ // Initialise predicate from named constraint.
+ void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
+
+ // Unpack and widen half of predicate.
+ void punpkhi(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn);
+
+ // Unpack and widen half of predicate.
+ void punpklo(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn);
+
+ // Reverse bits (predicated).
+ void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Read the first-fault register.
+ void rdffr(const PRegisterWithLaneSize& pd);
+
+ // Return predicate of succesfully loaded elements.
+ void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
+
+ // Return predicate of succesfully loaded elements.
+ void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
+
+ // Read multiple of vector register size to scalar register.
+ void rdvl(const Register& xd, int imm6);
+
+ // Reverse all elements in a predicate.
+ void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
+
+ // Reverse all elements in a vector (unpredicated).
+ void rev(const ZRegister& zd, const ZRegister& zn);
+
+ // Reverse bytes / halfwords / words within elements (predicated).
+ void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Reverse bytes / halfwords / words within elements (predicated).
+ void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Reverse bytes / halfwords / words within elements (predicated).
+ void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Signed absolute difference (predicated).
+ void sabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed add reduction to scalar.
+ void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
+
+ // Signed integer convert to floating-point (predicated).
+ void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Signed divide (predicated).
+ void sdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed reversed divide (predicated).
+ void sdivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed dot product by indexed quadtuplet.
+ void sdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed dot product.
+ void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Conditionally select elements from two predicates.
+ void sel(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Conditionally select elements from two vectors.
+ void sel(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Initialise the first-fault register to all true.
+ void setffr();
+
+ // Signed maximum vectors (predicated).
+ void smax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed maximum with immediate (unpredicated).
+ void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+ // Signed maximum reduction to scalar.
+ void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Signed minimum vectors (predicated).
+ void smin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed minimum with immediate (unpredicated).
+ void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+ // Signed minimum reduction to scalar.
+ void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Signed multiply returning high half (predicated).
+ void smulh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Splice two vectors under predicate control.
+ void splice(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating add vectors (unpredicated).
+ void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating add immediate (unpredicated).
+ void sqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1);
+
+ // Signed saturating decrement scalar by multiple of 8-bit predicate
+ // constraint element count.
+ void sqdecb(const Register& xd,
+ const Register& wn,
+ int pattern,
+ int multiplier);
+
+ // Signed saturating decrement scalar by multiple of 8-bit predicate
+ // constraint element count.
+ void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating decrement scalar by multiple of 64-bit predicate
+ // constraint element count.
+ void sqdecd(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1);
+
+ // Signed saturating decrement scalar by multiple of 64-bit predicate
+ // constraint element count.
+ void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating decrement vector by multiple of 64-bit predicate
+ // constraint element count.
+ void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating decrement scalar by multiple of 16-bit predicate
+ // constraint element count.
+ void sqdech(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1);
+
+ // Signed saturating decrement scalar by multiple of 16-bit predicate
+ // constraint element count.
+ void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating decrement vector by multiple of 16-bit predicate
+ // constraint element count.
+ void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating decrement scalar by active predicate element count.
+ void sqdecp(const Register& xd,
+ const PRegisterWithLaneSize& pg,
+ const Register& wn);
+
+ // Signed saturating decrement scalar by active predicate element count.
+ void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
+
+ // Signed saturating decrement vector by active predicate element count.
+ void sqdecp(const ZRegister& zdn, const PRegister& pg);
+
+ // Signed saturating decrement scalar by multiple of 32-bit predicate
+ // constraint element count.
+ void sqdecw(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1);
+
+ // Signed saturating decrement scalar by multiple of 32-bit predicate
+ // constraint element count.
+ void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating decrement vector by multiple of 32-bit predicate
+ // constraint element count.
+ void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment scalar by multiple of 8-bit predicate
+ // constraint element count.
+ void sqincb(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1);
+
+ // Signed saturating increment scalar by multiple of 8-bit predicate
+ // constraint element count.
+ void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment scalar by multiple of 64-bit predicate
+ // constraint element count.
+ void sqincd(const Register& xd,
+ const Register& wn,
+ int pattern,
+ int multiplier);
+
+ // Signed saturating increment scalar by multiple of 64-bit predicate
+ // constraint element count.
+ void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment vector by multiple of 64-bit predicate
+ // constraint element count.
+ void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment scalar by multiple of 16-bit predicate
+ // constraint element count.
+ void sqinch(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1);
+
+ // Signed saturating increment scalar by multiple of 16-bit predicate
+ // constraint element count.
+ void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment vector by multiple of 16-bit predicate
+ // constraint element count.
+ void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment scalar by active predicate element count.
+ void sqincp(const Register& xd,
+ const PRegisterWithLaneSize& pg,
+ const Register& wn);
+
+ // Signed saturating increment scalar by active predicate element count.
+ void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
+
+ // Signed saturating increment vector by active predicate element count.
+ void sqincp(const ZRegister& zdn, const PRegister& pg);
+
+ // Signed saturating increment scalar by multiple of 32-bit predicate
+ // constraint element count.
+ void sqincw(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1);
+
+ // Signed saturating increment scalar by multiple of 32-bit predicate
+ // constraint element count.
+ void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating increment vector by multiple of 32-bit predicate
+ // constraint element count.
+ void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Signed saturating subtract vectors (unpredicated).
+ void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating subtract immediate (unpredicated).
+ void sqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1);
+
+ // Contiguous/scatter store bytes from vector.
+ void st1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/scatter store halfwords from vector.
+ void st1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/scatter store words from vector.
+ void st1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous/scatter store doublewords from vector.
+ void st1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store two-byte structures from two vectors.
+ void st2b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store two-halfword structures from two vectors.
+ void st2h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store two-word structures from two vectors.
+ void st2w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store two-doubleword structures from two vectors,
+ void st2d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store three-byte structures from three vectors.
+ void st3b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store three-halfword structures from three vectors.
+ void st3h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store three-word structures from three vectors.
+ void st3w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store three-doubleword structures from three vectors.
+ void st3d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store four-byte structures from four vectors.
+ void st4b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store four-halfword structures from four vectors.
+ void st4h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store four-word structures from four vectors.
+ void st4w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store four-doubleword structures from four vectors.
+ void st4d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store non-temporal bytes from vector.
+ void stnt1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store non-temporal halfwords from vector.
+ void stnt1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store non-temporal words from vector.
+ void stnt1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous store non-temporal doublewords from vector.
+ void stnt1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Store SVE predicate/vector register.
+ void str(const CPURegister& rt, const SVEMemOperand& addr);
+
+ // Subtract vectors (predicated).
+ void sub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Subtract vectors (unpredicated).
+ void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Subtract immediate (unpredicated).
+ void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
+
+ // Reversed subtract vectors (predicated).
+ void subr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Reversed subtract from immediate (unpredicated).
+ void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
+
+ // Signed unpack and extend half of vector.
+ void sunpkhi(const ZRegister& zd, const ZRegister& zn);
+
+ // Signed unpack and extend half of vector.
+ void sunpklo(const ZRegister& zd, const ZRegister& zn);
+
+ // Signed byte extend (predicated).
+ void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Signed halfword extend (predicated).
+ void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Signed word extend (predicated).
+ void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Programmable table lookup/permute using vector of indices into a
+ // vector.
+ void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Interleave even or odd elements from two predicates.
+ void trn1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Interleave even or odd elements from two vectors.
+ void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Interleave even or odd elements from two predicates.
+ void trn2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Interleave even or odd elements from two vectors.
+ void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned absolute difference (predicated).
+ void uabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned add reduction to scalar.
+ void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
+
+ // Unsigned integer convert to floating-point (predicated).
+ void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Unsigned divide (predicated).
+ void udiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned reversed divide (predicated).
+ void udivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned dot product by indexed quadtuplet.
+ void udot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned dot product.
+ void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned maximum vectors (predicated).
+ void umax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned maximum with immediate (unpredicated).
+ void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+ // Unsigned maximum reduction to scalar.
+ void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Unsigned minimum vectors (predicated).
+ void umin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned minimum with immediate (unpredicated).
+ void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+ // Unsigned minimum reduction to scalar.
+ void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+ // Unsigned multiply returning high half (predicated).
+ void umulh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating add vectors (unpredicated).
+ void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned saturating add immediate (unpredicated).
+ void uqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1);
+
+ // Unsigned saturating decrement scalar by multiple of 8-bit predicate
+ // constraint element count.
+ void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating decrement scalar by multiple of 64-bit predicate
+ // constraint element count.
+ void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating decrement vector by multiple of 64-bit predicate
+ // constraint element count.
+ void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating decrement scalar by multiple of 16-bit predicate
+ // constraint element count.
+ void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating decrement vector by multiple of 16-bit predicate
+ // constraint element count.
+ void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating decrement scalar by active predicate element count.
+ void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+ // Unsigned saturating decrement vector by active predicate element count.
+ void uqdecp(const ZRegister& zdn, const PRegister& pg);
+
+ // Unsigned saturating decrement scalar by multiple of 32-bit predicate
+ // constraint element count.
+ void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating decrement vector by multiple of 32-bit predicate
+ // constraint element count.
+ void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment scalar by multiple of 8-bit predicate
+ // constraint element count.
+ void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment scalar by multiple of 64-bit predicate
+ // constraint element count.
+ void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment vector by multiple of 64-bit predicate
+ // constraint element count.
+ void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment scalar by multiple of 16-bit predicate
+ // constraint element count.
+ void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment vector by multiple of 16-bit predicate
+ // constraint element count.
+ void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment scalar by active predicate element count.
+ void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+ // Unsigned saturating increment vector by active predicate element count.
+ void uqincp(const ZRegister& zdn, const PRegister& pg);
+
+ // Unsigned saturating increment scalar by multiple of 32-bit predicate
+ // constraint element count.
+ void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating increment vector by multiple of 32-bit predicate
+ // constraint element count.
+ void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+ // Unsigned saturating subtract vectors (unpredicated).
+ void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned saturating subtract immediate (unpredicated).
+ void uqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1);
+
+ // Unsigned unpack and extend half of vector.
+ void uunpkhi(const ZRegister& zd, const ZRegister& zn);
+
+ // Unsigned unpack and extend half of vector.
+ void uunpklo(const ZRegister& zd, const ZRegister& zn);
+
+ // Unsigned byte extend (predicated).
+ void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Unsigned halfword extend (predicated).
+ void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Unsigned word extend (predicated).
+ void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Concatenate even or odd elements from two predicates.
+ void uzp1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Concatenate even or odd elements from two vectors.
+ void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Concatenate even or odd elements from two predicates.
+ void uzp2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Concatenate even or odd elements from two vectors.
+ void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // While incrementing signed scalar less than or equal to scalar.
+ void whilele(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While incrementing unsigned scalar lower than scalar.
+ void whilelo(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While incrementing unsigned scalar lower or same as scalar.
+ void whilels(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While incrementing signed scalar less than scalar.
+ void whilelt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // Write the first-fault register.
+ void wrffr(const PRegisterWithLaneSize& pn);
+
+ // Interleave elements from two half predicates.
+ void zip1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Interleave elements from two half vectors.
+ void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Interleave elements from two half predicates.
+ void zip2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm);
+
+ // Interleave elements from two half vectors.
+ void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
// Emit generic instructions.
// Emit raw instructions into the instruction stream.
@@ -3650,20 +5852,20 @@ class Assembler : public vixl::internal::AssemblerBase {
// Code generation helpers.
// Register encoding.
- static Instr Rd(CPURegister rd) {
- VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode);
- return rd.GetCode() << Rd_offset;
+ template <int hibit, int lobit>
+ static Instr Rx(CPURegister rx) {
+ VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
+ return ImmUnsignedField<hibit, lobit>(rx.GetCode());
}
- static Instr Rn(CPURegister rn) {
- VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode);
- return rn.GetCode() << Rn_offset;
- }
-
- static Instr Rm(CPURegister rm) {
- VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
- return rm.GetCode() << Rm_offset;
+#define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
+#define REGISTER_ENCODER(N) \
+ static Instr R##N(CPURegister r##N) { \
+ return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
}
+ CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
+#undef REGISTER_ENCODER
+#undef CPU_REGISTER_FIELD_NAMES
static Instr RmNot31(CPURegister rm) {
VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
@@ -3671,26 +5873,6 @@ class Assembler : public vixl::internal::AssemblerBase {
return Rm(rm);
}
- static Instr Ra(CPURegister ra) {
- VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode);
- return ra.GetCode() << Ra_offset;
- }
-
- static Instr Rt(CPURegister rt) {
- VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode);
- return rt.GetCode() << Rt_offset;
- }
-
- static Instr Rt2(CPURegister rt2) {
- VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode);
- return rt2.GetCode() << Rt2_offset;
- }
-
- static Instr Rs(CPURegister rs) {
- VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode);
- return rs.GetCode() << Rs_offset;
- }
-
// These encoding functions allow the stack pointer to be encoded, and
// disallow the zero register.
static Instr RdSP(Register rd) {
@@ -3708,6 +5890,33 @@ class Assembler : public vixl::internal::AssemblerBase {
return (rm.GetCode() & kRegCodeMask) << Rm_offset;
}
+ static Instr Pd(PRegister pd) {
+ return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
+ }
+
+ static Instr Pm(PRegister pm) {
+ return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
+ }
+
+ static Instr Pn(PRegister pn) {
+ return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
+ }
+
+ static Instr PgLow8(PRegister pg) {
+ // Governing predicates can be merging, zeroing, or unqualified. They should
+ // never have a lane size.
+ VIXL_ASSERT(!pg.HasLaneSize());
+ return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
+ }
+
+ template <int hibit, int lobit>
+ static Instr Pg(PRegister pg) {
+ // Governing predicates can be merging, zeroing, or unqualified. They should
+ // never have a lane size.
+ VIXL_ASSERT(!pg.HasLaneSize());
+ return Rx<hibit, lobit>(pg);
+ }
+
// Flags encoding.
static Instr Flags(FlagsUpdate S) {
if (S == SetFlags) {
@@ -3721,6 +5930,26 @@ class Assembler : public vixl::internal::AssemblerBase {
static Instr Cond(Condition cond) { return cond << Condition_offset; }
+ // Generic immediate encoding.
+ template <int hibit, int lobit>
+ static Instr ImmField(int64_t imm) {
+ VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
+ VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
+ int fieldsize = hibit - lobit + 1;
+ VIXL_ASSERT(IsIntN(fieldsize, imm));
+ return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
+ }
+
+ // For unsigned immediate encoding.
+ // TODO: Handle signed and unsigned immediate in satisfactory way.
+ template <int hibit, int lobit>
+ static Instr ImmUnsignedField(uint64_t imm) {
+ VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
+ VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
+ VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
+ return static_cast<Instr>(imm << lobit);
+ }
+
// PC-relative address encoding.
static Instr ImmPCRelAddress(int64_t imm21) {
VIXL_ASSERT(IsInt21(imm21));
@@ -3771,11 +6000,60 @@ class Assembler : public vixl::internal::AssemblerBase {
if (IsUint12(imm)) { // No shift required.
imm <<= ImmAddSub_offset;
} else {
- imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
+ imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
}
return imm;
}
+ static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
+ VIXL_ASSERT(IsUint6(imms));
+ VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
+ USE(lane_size);
+ return imms << SVEImmSetBits_offset;
+ }
+
+ static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
+ VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
+ USE(lane_size);
+ return immr << SVEImmRotate_offset;
+ }
+
+ static Instr SVEBitN(unsigned bitn) {
+ VIXL_ASSERT(IsUint1(bitn));
+ return bitn << SVEBitN_offset;
+ }
+
+ static Instr SVEDtype(unsigned msize_in_bytes_log2,
+ unsigned esize_in_bytes_log2,
+ bool is_signed,
+ int dtype_h_lsb = 23,
+ int dtype_l_lsb = 21) {
+ VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
+ VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
+ Instr dtype_h = msize_in_bytes_log2;
+ Instr dtype_l = esize_in_bytes_log2;
+ // Signed forms use the encodings where msize would be greater than esize.
+ if (is_signed) {
+ dtype_h = dtype_h ^ 0x3;
+ dtype_l = dtype_l ^ 0x3;
+ }
+ VIXL_ASSERT(IsUint2(dtype_h));
+ VIXL_ASSERT(IsUint2(dtype_l));
+ VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
+
+ return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
+ }
+
+ static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
+ unsigned esize_in_bytes_log2,
+ bool is_signed) {
+ return SVEDtype(msize_in_bytes_log2,
+ esize_in_bytes_log2,
+ is_signed,
+ 23,
+ 13);
+ }
+
static Instr ImmS(unsigned imms, unsigned reg_size) {
VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
((reg_size == kWRegSize) && IsUint5(imms)));
@@ -3856,9 +6134,9 @@ class Assembler : public vixl::internal::AssemblerBase {
return TruncateToUint9(imm9) << ImmLS_offset;
}
- static Instr ImmLSPair(int64_t imm7, unsigned access_size) {
- VIXL_ASSERT(IsMultiple(imm7, 1 << access_size));
- int64_t scaled_imm7 = imm7 / (1 << access_size);
+ static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
+ VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
+ int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
VIXL_ASSERT(IsInt7(scaled_imm7));
return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
}
@@ -3990,8 +6268,8 @@ class Assembler : public vixl::internal::AssemblerBase {
unsigned* n = NULL,
unsigned* imm_s = NULL,
unsigned* imm_r = NULL);
- static bool IsImmLSPair(int64_t offset, unsigned access_size);
- static bool IsImmLSScaled(int64_t offset, unsigned access_size);
+ static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
+ static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
static bool IsImmLSUnscaled(int64_t offset);
static bool IsImmMovn(uint64_t imm, unsigned reg_size);
static bool IsImmMovz(uint64_t imm, unsigned reg_size);
@@ -4126,6 +6404,30 @@ class Assembler : public vixl::internal::AssemblerBase {
}
}
+ template <typename T>
+ static Instr SVESize(const T& rd) {
+ VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
+ VIXL_ASSERT(rd.HasLaneSize());
+ switch (rd.GetLaneSizeInBytes()) {
+ case 1:
+ return SVE_B;
+ case 2:
+ return SVE_H;
+ case 4:
+ return SVE_S;
+ case 8:
+ return SVE_D;
+ default:
+ return 0xffffffff;
+ }
+ }
+
+ static Instr ImmSVEPredicateConstraint(int pattern) {
+ VIXL_ASSERT(IsUint5(pattern));
+ return (pattern << ImmSVEPredicateConstraint_offset) &
+ ImmSVEPredicateConstraint_mask;
+ }
+
static Instr ImmNEONHLM(int index, int num_bits) {
int h, l, m;
if (num_bits == 3) {
@@ -4277,9 +6579,93 @@ class Assembler : public vixl::internal::AssemblerBase {
const MemOperand& addr,
Instr op);
+ // Set `is_load` to false in default as it's only used in the
+ // scalar-plus-vector form.
+ Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
+ int num_regs,
+ const SVEMemOperand& addr,
+ bool is_load = false);
+
+ // E.g. st1b, st1h, ...
+ // This supports both contiguous and scatter stores.
+ void SVESt1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // E.g. ld1b, ld1h, ...
+ // This supports both contiguous and gather loads.
+ void SVELd1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ bool is_signed);
+
+ // E.g. ld1rb, ld1rh, ...
+ void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ bool is_signed);
+
+ // E.g. ldff1b, ldff1h, ...
+ // This supports both contiguous and gather loads.
+ void SVELdff1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ bool is_signed);
+
+ // Common code for the helpers above.
+ void SVELdSt1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ bool is_signed,
+ Instr op);
+
+ // Common code for the helpers above.
+ void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ bool is_load,
+ bool is_signed,
+ bool is_first_fault);
+
+ // E.g. st2b, st3h, ...
+ void SVESt234Helper(int num_regs,
+ const ZRegister& zt1,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // E.g. ld2b, ld3h, ...
+ void SVELd234Helper(int num_regs,
+ const ZRegister& zt1,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Common code for the helpers above.
+ void SVELdSt234Helper(int num_regs,
+ const ZRegister& zt1,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ Instr op);
+
+ // E.g. ld1qb, ld1qh, ldnt1b, ...
+ void SVELd1St1ScaImmHelper(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ Instr regoffset_op,
+ Instr immoffset_op,
+ int imm_divisor = 1);
+
void Prefetch(PrefetchOperation op,
const MemOperand& addr,
LoadStoreScalingOption option = PreferScaledOffset);
+ void Prefetch(int op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option = PreferScaledOffset);
// TODO(all): The third parameter should be passed by reference but gcc 4.8.2
// reports a bogus uninitialised warning then.
@@ -4287,6 +6673,9 @@ class Assembler : public vixl::internal::AssemblerBase {
const Register& rn,
const Operand operand,
LogicalOp op);
+
+ void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
+
void LogicalImmediate(const Register& rd,
const Register& rn,
unsigned n,
@@ -4306,6 +6695,92 @@ class Assembler : public vixl::internal::AssemblerBase {
FlagsUpdate S,
AddSubWithCarryOp op);
+ void CompareVectors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEIntCompareVectorsOp op);
+
+ void CompareVectors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm,
+ SVEIntCompareSignedImmOp op);
+
+ void CompareVectors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm,
+ SVEIntCompareUnsignedImmOp op);
+
+ void SVEIntAddSubtractImmUnpredicatedHelper(
+ SVEIntAddSubtractImm_UnpredicatedOp op,
+ const ZRegister& zd,
+ int imm8,
+ int shift);
+
+ void SVEElementCountToRegisterHelper(Instr op,
+ const Register& rd,
+ int pattern,
+ int multiplier);
+
+ Instr EncodeSVEShiftImmediate(Shift shift_op,
+ int shift,
+ int lane_size_in_bits);
+
+ void SVEBitwiseShiftImmediate(const ZRegister& zd,
+ const ZRegister& zn,
+ Instr encoded_imm,
+ SVEBitwiseShiftUnpredicatedOp op);
+
+ void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
+ const PRegisterM& pg,
+ Instr encoded_imm,
+ SVEBitwiseShiftByImm_PredicatedOp op);
+
+ Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
+ const ZRegister& zm,
+ int index,
+ Instr op_h,
+ Instr op_s,
+ Instr op_d);
+
+
+ void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size);
+
+ void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size);
+
+ void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size);
+
+ void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size);
+
+ void SVEPrefetchHelper(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size);
+
+ static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
+ // SVE only supports PLD and PST, not PLI.
+ VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
+ ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
+ // Check that we can simply map bits.
+ VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
+ VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
+ // Remaining operations map directly.
+ return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
+ }
// Functions for emulating operands not directly supported by the instruction
// set.
@@ -4507,12 +6982,16 @@ class Assembler : public vixl::internal::AssemblerBase {
NEONShiftImmediateOp op);
void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
+ // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
+ // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
+ void ResolveSVEImm8Shift(int* imm8, int* shift);
+
Instr LoadStoreStructAddrModeField(const MemOperand& addr);
// Encode the specified MemOperand for the specified access size and scaling
// preference.
Instr LoadStoreMemOperand(const MemOperand& addr,
- unsigned access_size,
+ unsigned access_size_in_bytes_log2,
LoadStoreScalingOption option);
// Link the current (not-yet-emitted) instruction to the specified label, then
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
new file mode 100644
index 00000000..f7cf8b21
--- /dev/null
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -0,0 +1,6489 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "assembler-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+void Assembler::ResolveSVEImm8Shift(int* imm8, int* shift) {
+ if (*shift < 0) {
+ VIXL_ASSERT(*shift == -1);
+ // Derive the shift amount from the immediate.
+ if (IsInt8(*imm8)) {
+ *shift = 0;
+ } else if ((*imm8 % 256) == 0) {
+ *imm8 /= 256;
+ *shift = 8;
+ }
+ }
+
+ VIXL_ASSERT(IsInt8(*imm8));
+ VIXL_ASSERT((*shift == 0) || (*shift == 8));
+}
+
+// SVEAddressGeneration.
+
+void Assembler::adr(const ZRegister& zd, const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsVectorPlusVector());
+ VIXL_ASSERT(
+ AreSameLaneSize(zd, addr.GetVectorBase(), addr.GetVectorOffset()));
+
+ int lane_size = zd.GetLaneSizeInBits();
+ VIXL_ASSERT((lane_size == kSRegSize) || (lane_size == kDRegSize));
+
+ int shift_amount = addr.GetShiftAmount();
+ VIXL_ASSERT((shift_amount >= 0) && (shift_amount <= 3));
+
+ Instr op = 0xffffffff;
+ Instr msz = shift_amount << 10;
+ SVEOffsetModifier mod = addr.GetOffsetModifier();
+ switch (mod) {
+ case SVE_UXTW:
+ VIXL_ASSERT(lane_size == kDRegSize);
+ op = ADR_z_az_d_u32_scaled;
+ break;
+ case SVE_SXTW:
+ VIXL_ASSERT(lane_size == kDRegSize);
+ op = ADR_z_az_d_s32_scaled;
+ break;
+ case SVE_LSL:
+ case NO_SVE_OFFSET_MODIFIER:
+ op = (lane_size == kSRegSize) ? ADR_z_az_s_same_scaled
+ : ADR_z_az_d_same_scaled;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ Emit(op | msz | Rd(zd) | Rn(addr.GetVectorBase()) |
+ Rm(addr.GetVectorOffset()));
+}
+
+void Assembler::SVELogicalImmediate(const ZRegister& zdn,
+ uint64_t imm,
+ Instr op) {
+ unsigned bit_n, imm_s, imm_r;
+ unsigned lane_size = zdn.GetLaneSizeInBits();
+ // Check that the immediate can be encoded in the instruction.
+ if (IsImmLogical(imm, lane_size, &bit_n, &imm_s, &imm_r)) {
+ Emit(op | Rd(zdn) | SVEBitN(bit_n) | SVEImmRotate(imm_r, lane_size) |
+ SVEImmSetBits(imm_s, lane_size));
+ } else {
+ VIXL_UNREACHABLE();
+ }
+}
+
+void Assembler::and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ SVELogicalImmediate(zd, imm, AND_z_zi);
+}
+
+void Assembler::dupm(const ZRegister& zd, uint64_t imm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ // DUPM_z_i is an SVEBroadcastBitmaskImmOp, but its encoding and constraints
+ // are similar enough to SVEBitwiseLogicalWithImm_UnpredicatedOp, that we can
+ // use the logical immediate encoder to get the correct behaviour.
+ SVELogicalImmediate(zd, imm, DUPM_z_i);
+}
+
+void Assembler::eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ SVELogicalImmediate(zd, imm, EOR_z_zi);
+}
+
+void Assembler::orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ SVELogicalImmediate(zd, imm, ORR_z_zi);
+}
+
+// SVEBitwiseLogicalUnpredicated.
+void Assembler::and_(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ Emit(AND_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::bic(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ Emit(BIC_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::eor(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ Emit(EOR_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::orr(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ Emit(ORR_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEBitwiseShiftPredicated.
+
+void Assembler::SVEBitwiseShiftImmediatePred(
+ const ZRegister& zdn,
+ const PRegisterM& pg,
+ Instr encoded_imm_and_tsz,
+ SVEBitwiseShiftByImm_PredicatedOp op) {
+ Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz)
+ << 5;
+ Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22;
+ Emit(op | tszh | tszl_and_imm | PgLow8(pg) | Rd(zdn));
+}
+
+void Assembler::asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 0000 100. .... .... ....
+ // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 0 | Pg<12:10> |
+ // tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi);
+}
+
+void Assembler::asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+ // 0000 0100 ..01 1000 100. .... .... ....
+ // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) ||
+ ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) &&
+ (zd.GetLaneSizeInBytes() != kDRegSizeInBytes)));
+ Instr op = ASR_z_p_zw;
+ if (AreSameLaneSize(zd, zn, zm)) {
+ op = ASR_z_p_zz;
+ }
+ Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::asrd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // ASRD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 0100 100. .... .... ....
+ // tszh<23:22> | opc<19:18> = 01 | L<17> = 0 | U<16> = 0 | Pg<12:10> |
+ // tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi);
+}
+
+void Assembler::asrr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0100 100. .... .... ....
+ // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(ASRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 0011 100. .... .... ....
+ // tszh<23:22> | opc<19:18> = 00 | L<17> = 1 | U<16> = 1 | Pg<12:10> |
+ // tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi);
+}
+
+void Assembler::lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+ // 0000 0100 ..01 1011 100. .... .... ....
+ // size<23:22> | R<18> = 0 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) ||
+ ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) &&
+ (zd.GetLaneSizeInBytes() != kDRegSizeInBytes)));
+ Instr op = LSL_z_p_zw;
+ if (AreSameLaneSize(zd, zn, zm)) {
+ op = LSL_z_p_zz;
+ }
+ Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lslr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0111 100. .... .... ....
+ // size<23:22> | R<18> = 1 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(LSLR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 0001 100. .... .... ....
+ // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 1 | Pg<12:10> |
+ // tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi);
+}
+
+void Assembler::lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+ // 0000 0100 ..01 1001 100. .... .... ....
+ // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) ||
+ ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) &&
+ (zd.GetLaneSizeInBytes() != kDRegSizeInBytes)));
+ Instr op = LSR_z_p_zw;
+ if (AreSameLaneSize(zd, zn, zm)) {
+ op = LSR_z_p_zz;
+ }
+ Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lsrr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0101 100. .... .... ....
+ // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(LSRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEBitwiseShiftUnpredicated.
+
+Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op,
+ int shift,
+ int lane_size_in_bits) {
+ if (shift_op == LSL) {
+ VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+ return lane_size_in_bits + shift;
+ }
+
+ VIXL_ASSERT((shift_op == ASR) || (shift_op == LSR));
+ VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits));
+ return (2 * lane_size_in_bits) - shift;
+}
+
+void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd,
+ const ZRegister& zn,
+ Instr encoded_imm_and_tsz,
+ SVEBitwiseShiftUnpredicatedOp op) {
+ Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz)
+ << 16;
+ Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22;
+ Emit(op | tszh | tszl_and_imm | Rd(zd) | Rn(zn));
+}
+
+void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi);
+}
+
+void Assembler::asr(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+ Emit(ASR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi);
+}
+
+void Assembler::lsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+ Emit(LSL_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Instr encoded_imm =
+ EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi);
+}
+
+void Assembler::lsr(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+ Emit(LSR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEElementCount.
+
+#define VIXL_SVE_INC_DEC_LIST(V) \
+ V(cntb, CNTB_r_s) \
+ V(cnth, CNTH_r_s) \
+ V(cntw, CNTW_r_s) \
+ V(cntd, CNTD_r_s) \
+ V(decb, DECB_r_rs) \
+ V(dech, DECH_r_rs) \
+ V(decw, DECW_r_rs) \
+ V(decd, DECD_r_rs) \
+ V(incb, INCB_r_rs) \
+ V(inch, INCH_r_rs) \
+ V(incw, INCW_r_rs) \
+ V(incd, INCD_r_rs) \
+ V(sqdecb, SQDECB_r_rs_x) \
+ V(sqdech, SQDECH_r_rs_x) \
+ V(sqdecw, SQDECW_r_rs_x) \
+ V(sqdecd, SQDECD_r_rs_x) \
+ V(sqincb, SQINCB_r_rs_x) \
+ V(sqinch, SQINCH_r_rs_x) \
+ V(sqincw, SQINCW_r_rs_x) \
+ V(sqincd, SQINCD_r_rs_x)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(rdn.IsX()); \
+ Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \
+ ImmUnsignedField<19, 16>(multiplier - 1)); \
+ }
+VIXL_SVE_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+#define VIXL_SVE_UQINC_UQDEC_LIST(V) \
+ V(uqdecb, (rdn.IsX() ? UQDECB_r_rs_x : UQDECB_r_rs_uw)) \
+ V(uqdech, (rdn.IsX() ? UQDECH_r_rs_x : UQDECH_r_rs_uw)) \
+ V(uqdecw, (rdn.IsX() ? UQDECW_r_rs_x : UQDECW_r_rs_uw)) \
+ V(uqdecd, (rdn.IsX() ? UQDECD_r_rs_x : UQDECD_r_rs_uw)) \
+ V(uqincb, (rdn.IsX() ? UQINCB_r_rs_x : UQINCB_r_rs_uw)) \
+ V(uqinch, (rdn.IsX() ? UQINCH_r_rs_x : UQINCH_r_rs_uw)) \
+ V(uqincw, (rdn.IsX() ? UQINCW_r_rs_x : UQINCW_r_rs_uw)) \
+ V(uqincd, (rdn.IsX() ? UQINCD_r_rs_x : UQINCD_r_rs_uw))
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \
+ ImmUnsignedField<19, 16>(multiplier - 1)); \
+ }
+VIXL_SVE_UQINC_UQDEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+#define VIXL_SVE_SQX_INC_DEC_LIST(V) \
+ V(sqdecb, SQDECB) \
+ V(sqdech, SQDECH) \
+ V(sqdecw, SQDECW) \
+ V(sqdecd, SQDECD) \
+ V(sqincb, SQINCB) \
+ V(sqinch, SQINCH) \
+ V(sqincw, SQINCW) \
+ V(sqincd, SQINCD)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP) \
+ void Assembler::FN(const Register& xd, \
+ const Register& wn, \
+ int pattern, \
+ int multiplier) { \
+ USE(wn); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(wn.IsW() && xd.Is(wn.X())); \
+ Emit(OP##_r_rs_sx | Rd(xd) | ImmSVEPredicateConstraint(pattern) | \
+ ImmUnsignedField<19, 16>(multiplier - 1)); \
+ }
+VIXL_SVE_SQX_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+#define VIXL_SVE_INC_DEC_VEC_LIST(V) \
+ V(dech, DEC, H) \
+ V(decw, DEC, W) \
+ V(decd, DEC, D) \
+ V(inch, INC, H) \
+ V(incw, INC, W) \
+ V(incd, INC, D) \
+ V(sqdech, SQDEC, H) \
+ V(sqdecw, SQDEC, W) \
+ V(sqdecd, SQDEC, D) \
+ V(sqinch, SQINC, H) \
+ V(sqincw, SQINC, W) \
+ V(sqincd, SQINC, D) \
+ V(uqdech, UQDEC, H) \
+ V(uqdecw, UQDEC, W) \
+ V(uqdecd, UQDEC, D) \
+ V(uqinch, UQINC, H) \
+ V(uqincw, UQINC, W) \
+ V(uqincd, UQINC, D)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, T) \
+ void Assembler::FN(const ZRegister& zdn, int pattern, int multiplier) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() == k##T##RegSizeInBytes); \
+ Emit(OP##T##_z_zs | Rd(zdn) | ImmSVEPredicateConstraint(pattern) | \
+ ImmUnsignedField<19, 16>(multiplier - 1)); \
+ }
+VIXL_SVE_INC_DEC_VEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+// SVEFPAccumulatingReduction.
+
+void Assembler::fadda(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm) {
+ // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+ // 0110 0101 ..01 1000 001. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Vdn<4:0>
+
+ USE(vn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.Is(vn));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(zm.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(AreSameLaneSize(zm, vd));
+
+ Emit(FADDA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEFPArithmetic_Predicated.
+
+void Assembler::fabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 1000 100. .... .... ....
+ // size<23:22> | opc<19:16> = 1000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1000 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT((imm == 0.5) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FADD_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0000 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 1101 100. .... .... ....
+ // size<23:22> | opc<19:16> = 1101 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fdivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 1100 100. .... .... ....
+ // size<23:22> | opc<19:16> = 1100 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1110 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 110 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FMAX_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0110 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0110 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1100 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 100 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FMAXNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0100 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0100 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMAXNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1111 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 111 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FMIN_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0111 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0111 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1101 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 101 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FMINNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0101 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0101 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMINNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1010 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 010 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT((imm == 0.5) || (imm == 2.0));
+
+ Instr i1 = (imm == 2.0) ? (1 << 5) : 0;
+ Emit(FMUL_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0010 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0010 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmulx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 1010 100. .... .... ....
+ // size<23:22> | opc<19:16> = 1010 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMULX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fscale(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 1001 100. .... .... ....
+ // size<23:22> | opc<19:16> = 1001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FSCALE_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1001 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 001 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT((imm == 0.5) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FSUB_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0001 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FSUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+ // 0110 0101 ..01 1011 100. ..00 00.. ....
+ // size<23:22> | opc<18:16> = 011 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT((imm == 0.5) || (imm == 1.0));
+
+ Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+ Emit(FSUBR_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0101 ..00 0011 100. .... .... ....
+ // size<23:22> | opc<19:16> = 0011 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FSUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::ftmad(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm3) {
+ // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm>
+ // 0110 0101 ..01 0... 1000 00.. .... ....
+ // size<23:22> | imm3<18:16> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FTMAD_z_zzi | SVESize(zd) | Rd(zd) | Rn(zm) |
+ ImmUnsignedField<18, 16>(imm3));
+}
+
+// SVEFPArithmeticUnpredicated.
+
+void Assembler::fadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 0000 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 0000 10.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::frecps(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 0001 10.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRECPS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::frsqrts(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 0001 11.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRSQRTS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fsub(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 0000 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ftsmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 0000 11.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FTSMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEFPCompareVectors.
+
+void Assembler::facge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 110. .... ...1 ....
+ // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FACGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::facgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 111. .... ...1 ....
+ // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 1 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FACGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 011. .... ...0 ....
+ // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FCMEQ_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 010. .... ...0 ....
+ // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FCMGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 010. .... ...1 ....
+ // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FCMGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 011. .... ...1 ....
+ // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FCMNE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmuo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..0. .... 110. .... ...0 ....
+ // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+ // o3<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FCMUO_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+// SVEFPCompareWithZero.
+
+void Assembler::fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+ // 0110 0101 ..01 0010 001. .... ...0 ....
+ // size<23:22> | eq<17> = 1 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(zero == 0.0);
+ USE(zero);
+
+ Emit(FCMEQ_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+ // 0110 0101 ..01 0000 001. .... ...0 ....
+ // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(zero == 0.0);
+ USE(zero);
+
+ Emit(FCMGE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+ // 0110 0101 ..01 0000 001. .... ...1 ....
+ // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(zero == 0.0);
+ USE(zero);
+
+ Emit(FCMGT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmle(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+ // 0110 0101 ..01 0001 001. .... ...1 ....
+ // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(zero == 0.0);
+ USE(zero);
+
+ Emit(FCMLE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmlt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+ // 0110 0101 ..01 0001 001. .... ...0 ....
+ // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(zero == 0.0);
+ USE(zero);
+
+ Emit(FCMLT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+ // 0110 0101 ..01 0011 001. .... ...0 ....
+ // size<23:22> | eq<17> = 1 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(zero == 0.0);
+ USE(zero);
+
+ Emit(FCMNE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEFPComplexAddition.
+
+void Assembler::fcadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
+ // 0110 0100 ..00 000. 100. .... .... ....
+ // size<23:22> | rot<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT((rot == 90) || (rot == 270));
+
+ Instr rotate_bit = (rot == 90) ? 0 : (1 << 16);
+ Emit(FCADD_z_p_zz | rotate_bit | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEFPComplexMulAdd.
+
+void Assembler::fcmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
+ // 0110 0100 ..0. .... 0... .... .... ....
+ // size<23:22> | Zm<20:16> | rot<14:13> | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+
+ Instr rotate_bit = (rot / 90) << 13;
+ Emit(FCMLA_z_p_zzz | rotate_bit | SVESize(zda) | Rd(zda) | PgLow8(pg) |
+ Rn(zn) | Rm(zm));
+}
+
+// SVEFPComplexMulAddIndex.
+
+void Assembler::fcmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+ VIXL_ASSERT(index >= 0);
+
+ int lane_size = zda.GetLaneSizeInBytes();
+
+ Instr zm_and_idx = 0;
+ Instr op = FCMLA_z_zzzi_h;
+ if (lane_size == kHRegSizeInBytes) {
+ // Zm<18:16> | i2<20:19>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3));
+ zm_and_idx = (index << 19) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i1<20>
+ VIXL_ASSERT(lane_size == kSRegSizeInBytes);
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1));
+ zm_and_idx = (index << 20) | Rx<19, 16>(zm);
+ op = FCMLA_z_zzzi_s;
+ }
+
+ Instr rotate_bit = (rot / 90) << 10;
+ Emit(op | zm_and_idx | rotate_bit | Rd(zda) | Rn(zn));
+}
+
+// SVEFPFastReduction.
+
+void Assembler::faddv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // FADDV <V><d>, <Pg>, <Zn>.<T>
+ // 0110 0101 ..00 0000 001. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+ Emit(FADDV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fmaxnmv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // FMAXNMV <V><d>, <Pg>, <Zn>.<T>
+ // 0110 0101 ..00 0100 001. .... .... ....
+ // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+ Emit(FMAXNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fmaxv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // FMAXV <V><d>, <Pg>, <Zn>.<T>
+ // 0110 0101 ..00 0110 001. .... .... ....
+ // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+ Emit(FMAXV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fminnmv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // FMINNMV <V><d>, <Pg>, <Zn>.<T>
+ // 0110 0101 ..00 0101 001. .... .... ....
+ // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+ Emit(FMINNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fminv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // FMINV <V><d>, <Pg>, <Zn>.<T>
+ // 0110 0101 ..00 0111 001. .... .... ....
+ // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+ Emit(FMINV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEFPMulAdd.
+
+void Assembler::fmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+ // 0110 0101 ..1. .... 100. .... .... ....
+ // size<23:22> | Za<20:16> | opc<14:13> = 00 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+void Assembler::fmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..1. .... 000. .... .... ....
+ // size<23:22> | Zm<20:16> | opc<14:13> = 00 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..1. .... 001. .... .... ....
+ // size<23:22> | Zm<20:16> | opc<14:13> = 01 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+ // 0110 0101 ..1. .... 101. .... .... ....
+ // size<23:22> | Za<20:16> | opc<14:13> = 01 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+void Assembler::fnmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+ // 0110 0101 ..1. .... 110. .... .... ....
+ // size<23:22> | Za<20:16> | opc<14:13> = 10 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FNMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+void Assembler::fnmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..1. .... 010. .... .... ....
+ // size<23:22> | Zm<20:16> | opc<14:13> = 10 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FNMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fnmls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+ // 0110 0101 ..1. .... 011. .... .... ....
+ // size<23:22> | Zm<20:16> | opc<14:13> = 11 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FNMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fnmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+ // 0110 0101 ..1. .... 111. .... .... ....
+ // size<23:22> | Za<20:16> | opc<14:13> = 11 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
+ const ZRegister& zm,
+ int index,
+ Instr op_h,
+ Instr op_s,
+ Instr op_d) {
+ Instr size = lane_size_in_bytes_log2 << SVESize_offset;
+ Instr zm_with_index = Rm(zm);
+ Instr op = 0xffffffff;
+ // Allowable register number and lane index depends on the lane size.
+ switch (lane_size_in_bytes_log2) {
+ case kHRegSizeInBytesLog2:
+ VIXL_ASSERT(zm.GetCode() <= 7);
+ VIXL_ASSERT(IsUint3(index));
+ // For H-sized lanes, size is encoded as 0b0x, where x is used as the top
+ // bit of the index. So, if index is less than four, the top bit of index
+ // is zero, and therefore size is 0b00. Otherwise, it's 0b01, the usual
+ // encoding for H-sized lanes.
+ if (index < 4) size = 0;
+ // Top two bits of "zm" encode the index.
+ zm_with_index |= (index & 3) << (Rm_offset + 3);
+ op = op_h;
+ break;
+ case kSRegSizeInBytesLog2:
+ VIXL_ASSERT(zm.GetCode() <= 7);
+ VIXL_ASSERT(IsUint2(index));
+ // Top two bits of "zm" encode the index.
+ zm_with_index |= (index & 3) << (Rm_offset + 3);
+ op = op_s;
+ break;
+ case kDRegSizeInBytesLog2:
+ VIXL_ASSERT(zm.GetCode() <= 15);
+ VIXL_ASSERT(IsUint1(index));
+ // Top bit of "zm" encodes the index.
+ zm_with_index |= (index & 1) << (Rm_offset + 4);
+ op = op_d;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ return op | zm_with_index | size;
+}
+
+// SVEFPMulAddIndex.
+
+void Assembler::fmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ // The encoding of opcode, index, Zm, and size are synthesized in this
+ // variable.
+ Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ FMLA_z_zzzi_h,
+ FMLA_z_zzzi_s,
+ FMLA_z_zzzi_d);
+
+ Emit(synthesized_op | Rd(zda) | Rn(zn));
+}
+
+void Assembler::fmls(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ // The encoding of opcode, index, Zm, and size are synthesized in this
+ // variable.
+ Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ FMLS_z_zzzi_h,
+ FMLS_z_zzzi_s,
+ FMLS_z_zzzi_d);
+
+ Emit(synthesized_op | Rd(zda) | Rn(zn));
+}
+
+// SVEFPMulIndex.
+
+// This prototype maps to 3 instruction encodings:
+void Assembler::fmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned index) {
+ // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>[<imm>]
+ // 0110 0100 ..1. .... 0010 00.. .... ....
+ // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ // The encoding of opcode, index, Zm, and size are synthesized in this
+ // variable.
+ Instr synthesized_op = SVEFPMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ FMUL_z_zzi_h,
+ FMUL_z_zzi_s,
+ FMUL_z_zzi_d);
+
+ Emit(synthesized_op | Rd(zd) | Rn(zn));
+}
+
+// SVEFPUnaryOpPredicated.
+
+void Assembler::fcvt(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Instr op = 0xffffffff;
+ switch (zn.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kSRegSizeInBytes:
+ op = FCVT_z_p_z_h2s;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVT_z_p_z_h2d;
+ break;
+ }
+ break;
+ case kSRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = FCVT_z_p_z_s2h;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVT_z_p_z_s2d;
+ break;
+ }
+ break;
+ case kDRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = FCVT_z_p_z_d2h;
+ break;
+ case kSRegSizeInBytes:
+ op = FCVT_z_p_z_d2s;
+ break;
+ }
+ break;
+ }
+ VIXL_ASSERT(op != 0xffffffff);
+
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtzs(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Instr op = 0xffffffff;
+ switch (zn.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = FCVTZS_z_p_z_fp162h;
+ break;
+ case kSRegSizeInBytes:
+ op = FCVTZS_z_p_z_fp162w;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVTZS_z_p_z_fp162x;
+ break;
+ }
+ break;
+ case kSRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kSRegSizeInBytes:
+ op = FCVTZS_z_p_z_s2w;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVTZS_z_p_z_s2x;
+ break;
+ }
+ break;
+ case kDRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kSRegSizeInBytes:
+ op = FCVTZS_z_p_z_d2w;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVTZS_z_p_z_d2x;
+ break;
+ }
+ break;
+ }
+ VIXL_ASSERT(op != 0xffffffff);
+
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtzu(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Instr op = 0xffffffff;
+ switch (zn.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = FCVTZU_z_p_z_fp162h;
+ break;
+ case kSRegSizeInBytes:
+ op = FCVTZU_z_p_z_fp162w;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVTZU_z_p_z_fp162x;
+ break;
+ }
+ break;
+ case kSRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kSRegSizeInBytes:
+ op = FCVTZU_z_p_z_s2w;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVTZU_z_p_z_s2x;
+ break;
+ }
+ break;
+ case kDRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kSRegSizeInBytes:
+ op = FCVTZU_z_p_z_d2w;
+ break;
+ case kDRegSizeInBytes:
+ op = FCVTZU_z_p_z_d2x;
+ break;
+ }
+ break;
+ }
+ VIXL_ASSERT(op != 0xffffffff);
+
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frecpx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0110 0101 ..00 1100 101. .... .... ....
+ // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRECPX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frinta(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTA_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frinti(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTI_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTM_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintn(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTN_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTP_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintz(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRINTZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fsqrt(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FSQRT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0110 0101 ..00 1101 101. .... .... ....
+ // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FSQRT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::scvtf(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Instr op = 0xffffffff;
+ switch (zn.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = SCVTF_z_p_z_h2fp16;
+ break;
+ }
+ break;
+ case kSRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = SCVTF_z_p_z_w2fp16;
+ break;
+ case kSRegSizeInBytes:
+ op = SCVTF_z_p_z_w2s;
+ break;
+ case kDRegSizeInBytes:
+ op = SCVTF_z_p_z_w2d;
+ break;
+ }
+ break;
+ case kDRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = SCVTF_z_p_z_x2fp16;
+ break;
+ case kSRegSizeInBytes:
+ op = SCVTF_z_p_z_x2s;
+ break;
+ case kDRegSizeInBytes:
+ op = SCVTF_z_p_z_x2d;
+ break;
+ }
+ break;
+ }
+ VIXL_ASSERT(op != 0xffffffff);
+
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::ucvtf(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Instr op = 0xffffffff;
+ switch (zn.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = UCVTF_z_p_z_h2fp16;
+ break;
+ }
+ break;
+ case kSRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = UCVTF_z_p_z_w2fp16;
+ break;
+ case kSRegSizeInBytes:
+ op = UCVTF_z_p_z_w2s;
+ break;
+ case kDRegSizeInBytes:
+ op = UCVTF_z_p_z_w2d;
+ break;
+ }
+ break;
+ case kDRegSizeInBytes:
+ switch (zd.GetLaneSizeInBytes()) {
+ case kHRegSizeInBytes:
+ op = UCVTF_z_p_z_x2fp16;
+ break;
+ case kSRegSizeInBytes:
+ op = UCVTF_z_p_z_x2s;
+ break;
+ case kDRegSizeInBytes:
+ op = UCVTF_z_p_z_x2d;
+ break;
+ }
+ break;
+ }
+ VIXL_ASSERT(op != 0xffffffff);
+
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEFPUnaryOpUnpredicated.
+
+void Assembler::frecpe(const ZRegister& zd, const ZRegister& zn) {
+ // FRECPE <Zd>.<T>, <Zn>.<T>
+ // 0110 0101 ..00 1110 0011 00.. .... ....
+ // size<23:22> | opc<18:16> = 110 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRECPE_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::frsqrte(const ZRegister& zd, const ZRegister& zn) {
+ // FRSQRTE <Zd>.<T>, <Zn>.<T>
+ // 0110 0101 ..00 1111 0011 00.. .... ....
+ // size<23:22> | opc<18:16> = 111 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FRSQRTE_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+// SVEIncDecByPredicateCount.
+
+void Assembler::decp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ // DECP <Xdn>, <Pg>.<T>
+ // 0010 0101 ..10 1101 1000 100. .... ....
+ // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> |
+ // Rdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(rdn.IsX());
+
+ Emit(DECP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::decp(const ZRegister& zdn, const PRegister& pg) {
+ // DECP <Zdn>.<T>, <Pg>
+ // 0010 0101 ..10 1101 1000 000. .... ....
+ // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> |
+ // Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(pg.IsUnqualified());
+
+ Emit(DECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::incp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ // INCP <Xdn>, <Pg>.<T>
+ // 0010 0101 ..10 1100 1000 100. .... ....
+ // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> |
+ // Rdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(rdn.IsX());
+
+ Emit(INCP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::incp(const ZRegister& zdn, const PRegister& pg) {
+ // INCP <Zdn>.<T>, <Pg>
+ // 0010 0101 ..10 1100 1000 000. .... ....
+ // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> |
+ // Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(pg.IsUnqualified());
+
+ Emit(INCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::sqdecp(const Register& xd,
+ const PRegisterWithLaneSize& pg,
+ const Register& wn) {
+ // SQDECP <Xdn>, <Pg>.<T>, <Wdn>
+ // 0010 0101 ..10 1010 1000 100. .... ....
+ // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> |
+ // Rdn<4:0>
+
+ USE(wn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn));
+
+ Emit(SQDECP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+ // SQDECP <Xdn>, <Pg>.<T>
+ // 0010 0101 ..10 1010 1000 110. .... ....
+ // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> |
+ // Rdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xdn.IsX());
+
+ Emit(SQDECP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqdecp(const ZRegister& zdn, const PRegister& pg) {
+ // SQDECP <Zdn>.<T>, <Pg>
+ // 0010 0101 ..10 1010 1000 000. .... ....
+ // size<23:22> | D<17> = 1 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(pg.IsUnqualified());
+
+ Emit(SQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::sqincp(const Register& xd,
+ const PRegisterWithLaneSize& pg,
+ const Register& wn) {
+ // SQINCP <Xdn>, <Pg>.<T>, <Wdn>
+ // 0010 0101 ..10 1000 1000 100. .... ....
+ // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> |
+ // Rdn<4:0>
+
+ USE(wn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn));
+
+ Emit(SQINCP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+ // SQINCP <Xdn>, <Pg>.<T>
+ // 0010 0101 ..10 1000 1000 110. .... ....
+ // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> |
+ // Rdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xdn.IsX());
+
+ Emit(SQINCP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqincp(const ZRegister& zdn, const PRegister& pg) {
+ // SQINCP <Zdn>.<T>, <Pg>
+ // 0010 0101 ..10 1000 1000 000. .... ....
+ // size<23:22> | D<17> = 0 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(pg.IsUnqualified());
+
+ Emit(SQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ // UQDECP <Wdn>, <Pg>.<T>
+ // UQDECP <Xdn>, <Pg>.<T>
+ // 0010 0101 ..10 1011 1000 10.. .... ....
+ // size<23:22> | D<17> = 1 | U<16> = 1 | sf<10> | op<9> = 0 | Pg<8:5> |
+ // Rdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Instr op = rdn.IsX() ? UQDECP_r_p_r_x : UQDECP_r_p_r_uw;
+ Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::uqdecp(const ZRegister& zdn, const PRegister& pg) {
+ // UQDECP <Zdn>.<T>, <Pg>
+ // 0010 0101 ..10 1011 1000 000. .... ....
+ // size<23:22> | D<17> = 1 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(pg.IsUnqualified());
+
+ Emit(UQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ // UQINCP <Wdn>, <Pg>.<T>
+ // 0010 0101 ..10 1001 1000 100. .... ....
+ // size<23:22> | D<17> = 0 | U<16> = 1 | sf<10> = 0 | op<9> = 0 | Pg<8:5> |
+ // Rdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Instr op = rdn.IsX() ? UQINCP_r_p_r_x : UQINCP_r_p_r_uw;
+ Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::uqincp(const ZRegister& zdn, const PRegister& pg) {
+ // UQINCP <Zdn>.<T>, <Pg>
+ // 0010 0101 ..10 1001 1000 000. .... ....
+ // size<23:22> | D<17> = 0 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+ VIXL_ASSERT(pg.IsUnqualified());
+
+ Emit(UQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+// SVEIndexGeneration.
+
+void Assembler::index(const ZRegister& zd, int start, int step) {
+ // INDEX <Zd>.<T>, #<imm1>, #<imm2>
+ // 0000 0100 ..1. .... 0100 00.. .... ....
+ // size<23:22> | step<20:16> | start<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(INDEX_z_ii | SVESize(zd) | ImmField<20, 16>(step) |
+ ImmField<9, 5>(start) | Rd(zd));
+}
+
+void Assembler::index(const ZRegister& zd,
+ const Register& rn,
+ const Register& rm) {
+ // INDEX <Zd>.<T>, <R><n>, <R><m>
+ // 0000 0100 ..1. .... 0100 11.. .... ....
+ // size<23:22> | Rm<20:16> | Rn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >=
+ zd.GetLaneSizeInBits());
+ VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >=
+ zd.GetLaneSizeInBits());
+
+ Emit(INDEX_z_rr | SVESize(zd) | Rd(zd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::index(const ZRegister& zd, const Register& rn, int imm5) {
+ // INDEX <Zd>.<T>, <R><n>, #<imm>
+ // 0000 0100 ..1. .... 0100 01.. .... ....
+ // size<23:22> | imm5<20:16> | Rn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >=
+ zd.GetLaneSizeInBits());
+
+ Emit(INDEX_z_ri | SVESize(zd) | Rd(zd) | Rn(rn) | ImmField<20, 16>(imm5));
+}
+
+void Assembler::index(const ZRegister& zd, int imm5, const Register& rm) {
+ // INDEX <Zd>.<T>, #<imm>, <R><m>
+ // 0000 0100 ..1. .... 0100 10.. .... ....
+ // size<23:22> | Rm<20:16> | imm5<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >=
+ zd.GetLaneSizeInBits());
+
+ Emit(INDEX_z_ir | SVESize(zd) | Rd(zd) | ImmField<9, 5>(imm5) | Rm(rm));
+}
+
+// SVEIntArithmeticUnpredicated.
+
+void Assembler::add(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0000 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(ADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0001 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(SQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0001 10.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(SQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sub(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0000 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(SUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0001 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(UQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0001 11.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(UQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEIntBinaryArithmeticPredicated.
+
+void Assembler::add(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 0000 000. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(ADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::and_(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 1010 000. .... .... ....
+ // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(AND_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::bic(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 1011 000. .... .... ....
+ // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(BIC_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::eor(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 1001 000. .... .... ....
+ // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(EOR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::mul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // MUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0000 000. .... .... ....
+ // size<23:22> | H<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(MUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::orr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 1000 000. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(ORR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 1100 000. .... .... ....
+ // size<23:22> | opc<18:17> = 10 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(SABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0100 000. .... .... ....
+ // size<23:22> | R<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(SDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sdivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0110 000. .... .... ....
+ // size<23:22> | R<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(SDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::smax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 1000 000. .... .... ....
+ // size<23:22> | opc<18:17> = 00 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(SMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::smin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 1010 000. .... .... ....
+ // size<23:22> | opc<18:17> = 01 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(SMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::smulh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0010 000. .... .... ....
+ // size<23:22> | H<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(SMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 0001 000. .... .... ....
+ // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(SUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::subr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 0011 000. .... .... ....
+ // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(SUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 1101 000. .... .... ....
+ // size<23:22> | opc<18:17> = 10 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(UABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::udiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0101 000. .... .... ....
+ // size<23:22> | R<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(UDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::udivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0111 000. .... .... ....
+ // size<23:22> | R<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(UDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 1001 000. .... .... ....
+ // size<23:22> | opc<18:17> = 00 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(UMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..00 1011 000. .... .... ....
+ // size<23:22> | opc<18:17> = 01 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(UMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umulh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0100 ..01 0011 000. .... .... ....
+ // size<23:22> | H<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(UMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEIntCompareScalars.
+
+void Assembler::ctermeq(const Register& rn, const Register& rm) {
+ // CTERMEQ <R><n>, <R><m>
+ // 0010 0101 1.1. .... 0010 00.. ...0 0000
+ // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 0
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000;
+
+ Emit(CTERMEQ_rr | sz | Rn(rn) | Rm(rm));
+}
+
+void Assembler::ctermne(const Register& rn, const Register& rm) {
+ // CTERMNE <R><n>, <R><m>
+ // 0010 0101 1.1. .... 0010 00.. ...1 0000
+ // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 1
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000;
+
+ Emit(CTERMNE_rr | sz | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilele(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILELE <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 01.. ...1 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> |
+ // eq<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(WHILELE_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilelo(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILELO <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 11.. ...0 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> |
+ // eq<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(WHILELO_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilels(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILELS <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 11.. ...1 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> |
+ // eq<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(WHILELS_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilelt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILELT <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 01.. ...0 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> |
+ // eq<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(WHILELT_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::CompareVectors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEIntCompareVectorsOp op) {
+ Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::CompareVectors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm,
+ SVEIntCompareSignedImmOp op) {
+ Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm));
+}
+
+void Assembler::CompareVectors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm,
+ SVEIntCompareUnsignedImmOp op) {
+ Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) |
+ ImmUnsignedField<20, 14>(imm));
+}
+
+void Assembler::cmp(Condition cond,
+ const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ switch (cond) {
+ case eq:
+ cmpeq(pd, pg, zn, zm);
+ break;
+ case ge:
+ cmpge(pd, pg, zn, zm);
+ break;
+ case gt:
+ cmpgt(pd, pg, zn, zm);
+ break;
+ case le:
+ cmple(pd, pg, zn, zm);
+ break;
+ case lt:
+ cmplt(pd, pg, zn, zm);
+ break;
+ case ne:
+ cmpne(pd, pg, zn, zm);
+ break;
+ case hi:
+ cmphi(pd, pg, zn, zm);
+ break;
+ case hs:
+ cmphs(pd, pg, zn, zm);
+ break;
+ case lo:
+ cmplo(pd, pg, zn, zm);
+ break;
+ case ls:
+ cmpls(pd, pg, zn, zm);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+// SVEIntCompareSignedImm.
+
+void Assembler::cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0101 ..0. .... 100. .... ...0 ....
+ // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+ // | ne<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm5, CMPEQ_p_p_zi);
+}
+
+void Assembler::cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0101 ..0. .... 000. .... ...0 ....
+ // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+ // | ne<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm5, CMPGE_p_p_zi);
+}
+
+void Assembler::cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0101 ..0. .... 000. .... ...1 ....
+ // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+ // | ne<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm5, CMPGT_p_p_zi);
+}
+
+void Assembler::cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0101 ..0. .... 001. .... ...1 ....
+ // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5>
+ // | ne<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm5, CMPLE_p_p_zi);
+}
+
+void Assembler::cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0101 ..0. .... 001. .... ...0 ....
+ // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5>
+ // | ne<4> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm5, CMPLT_p_p_zi);
+}
+
+void Assembler::cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0101 ..0. .... 100. .... ...1 ....
+ // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+ // | ne<4> = 1 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm5, CMPNE_p_p_zi);
+}
+
+// SVEIntCompareUnsignedImm.
+
+void Assembler::cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7) {
+ // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0100 ..1. .... ..0. .... ...1 ....
+ // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm7, CMPHI_p_p_zi);
+}
+
+void Assembler::cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7) {
+ // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0100 ..1. .... ..0. .... ...0 ....
+ // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm7, CMPHS_p_p_zi);
+}
+
+void Assembler::cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7) {
+ // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0100 ..1. .... ..1. .... ...0 ....
+ // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm7, CMPLO_p_p_zi);
+}
+
+void Assembler::cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7) {
+ // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+ // 0010 0100 ..1. .... ..1. .... ...1 ....
+ // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+ CompareVectors(pd, pg, zn, imm7, CMPLS_p_p_zi);
+}
+
+// SVEIntCompareVectors.
+
+// This prototype maps to 2 instruction encodings:
+// CMPEQ_p_p_zw
+// CMPEQ_p_p_zz
+void Assembler::cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ SVEIntCompareVectorsOp op = CMPEQ_p_p_zz;
+ if (!AreSameLaneSize(zn, zm)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ op = CMPEQ_p_p_zw;
+ }
+ CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+// CMPGE_p_p_zw
+// CMPGE_p_p_zz
+void Assembler::cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ SVEIntCompareVectorsOp op = CMPGE_p_p_zz;
+ if (!AreSameLaneSize(zn, zm)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ op = CMPGE_p_p_zw;
+ }
+ CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+// CMPGT_p_p_zw
+// CMPGT_p_p_zz
+void Assembler::cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ SVEIntCompareVectorsOp op = CMPGT_p_p_zz;
+ if (!AreSameLaneSize(zn, zm)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ op = CMPGT_p_p_zw;
+ }
+ CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+// CMPHI_p_p_zw
+// CMPHI_p_p_zz
+void Assembler::cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ SVEIntCompareVectorsOp op = CMPHI_p_p_zz;
+ if (!AreSameLaneSize(zn, zm)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ op = CMPHI_p_p_zw;
+ }
+ CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+// CMPHS_p_p_zw
+// CMPHS_p_p_zz
+void Assembler::cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ SVEIntCompareVectorsOp op = CMPHS_p_p_zz;
+ if (!AreSameLaneSize(zn, zm)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ op = CMPHS_p_p_zw;
+ }
+ CompareVectors(pd, pg, zn, zm, op);
+}
+
+void Assembler::cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ if (AreSameLaneSize(zn, zm)) {
+ cmpge(pd, pg, zm, zn);
+ return;
+ }
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ VIXL_ASSERT(!zn.IsLaneSizeD());
+
+ CompareVectors(pd, pg, zn, zm, CMPLE_p_p_zw);
+}
+
+void Assembler::cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ if (AreSameLaneSize(zn, zm)) {
+ cmphi(pd, pg, zm, zn);
+ return;
+ }
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ VIXL_ASSERT(!zn.IsLaneSizeD());
+
+ CompareVectors(pd, pg, zn, zm, CMPLO_p_p_zw);
+}
+
+void Assembler::cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ if (AreSameLaneSize(zn, zm)) {
+ cmphs(pd, pg, zm, zn);
+ return;
+ }
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ VIXL_ASSERT(!zn.IsLaneSizeD());
+
+ CompareVectors(pd, pg, zn, zm, CMPLS_p_p_zw);
+}
+
+void Assembler::cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ if (AreSameLaneSize(zn, zm)) {
+ cmpgt(pd, pg, zm, zn);
+ return;
+ }
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ VIXL_ASSERT(!zn.IsLaneSizeD());
+
+ CompareVectors(pd, pg, zn, zm, CMPLT_p_p_zw);
+}
+
+// This prototype maps to 2 instruction encodings:
+// CMPNE_p_p_zw
+// CMPNE_p_p_zz
+void Assembler::cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn));
+ SVEIntCompareVectorsOp op = CMPNE_p_p_zz;
+ if (!AreSameLaneSize(zn, zm)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ op = CMPNE_p_p_zw;
+ }
+ CompareVectors(pd, pg, zn, zm, op);
+}
+
+// SVEIntMiscUnpredicated.
+
+void Assembler::fexpa(const ZRegister& zd, const ZRegister& zn) {
+ // FEXPA <Zd>.<T>, <Zn>.<T>
+ // 0000 0100 ..10 0000 1011 10.. .... ....
+ // size<23:22> | opc<20:16> = 00000 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FEXPA_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::ftssel(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 1011 00.. .... ....
+ // size<23:22> | Zm<20:16> | op<10> = 0 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FTSSEL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::movprfx(const ZRegister& zd, const ZRegister& zn) {
+ // MOVPRFX <Zd>, <Zn>
+ // 0000 0100 0010 0000 1011 11.. .... ....
+ // opc<23:22> = 00 | opc2<20:16> = 00000 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(MOVPRFX_z_z | Rd(zd) | Rn(zn));
+}
+
+// SVEIntMulAddPredicated.
+
+void Assembler::mad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ // MAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+ // 0000 0100 ..0. .... 110. .... .... ....
+ // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Za<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+
+ Emit(MAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za));
+}
+
+void Assembler::mla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // MLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..0. .... 010. .... .... ....
+ // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(MLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::mls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // MLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..0. .... 011. .... .... ....
+ // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(MLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::msb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ // MSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+ // 0000 0100 ..0. .... 111. .... .... ....
+ // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Za<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+
+ Emit(MSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za));
+}
+
+// SVEIntMulAddUnpredicated.
+
+void Assembler::sdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+ VIXL_ASSERT(AreSameLaneSize(zm, zn));
+
+ Emit(SDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::udot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+ VIXL_ASSERT(AreSameLaneSize(zm, zn));
+
+ Emit(UDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// SVEIntReduction.
+
+void Assembler::andv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(ANDV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::eorv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(EORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::movprfx(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T>
+ // 0000 0100 ..01 000. 001. .... .... ....
+ // size<23:22> | opc<18:17> = 00 | M<16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+ VIXL_ASSERT(!pg.HasLaneSize());
+
+ Instr m = pg.IsMerging() ? 0x00010000 : 0x00000000;
+ Emit(MOVPRFX_z_p_z | SVESize(zd) | m | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::orv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(ORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::saddv(const VRegister& dd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+ Emit(SADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::smaxv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(SMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sminv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(SMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uaddv(const VRegister& dd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(UADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::umaxv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(UMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uminv(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(UMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEIntUnaryArithmeticPredicated.
+
+void Assembler::abs(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 0110 101. .... .... ....
+ // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(ABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cls(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1000 101. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(CLS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::clz(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1001 101. .... .... ....
+ // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(CLZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cnot(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1011 101. .... .... ....
+ // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(CNOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cnt(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1010 101. .... .... ....
+ // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(CNT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fabs(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1100 101. .... .... ....
+ // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fneg(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1101 101. .... .... ....
+ // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(FNEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::neg(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 0111 101. .... .... ....
+ // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(NEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::not_(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 1110 101. .... .... ....
+ // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(NOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sxtb(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 0000 101. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes);
+
+ Emit(SXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sxth(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 0010 101. .... .... ....
+ // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes);
+
+ Emit(SXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sxtw(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // SXTW <Zd>.D, <Pg>/M, <Zn>.D
+ // 0000 0100 ..01 0100 101. .... .... ....
+ // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes);
+
+ Emit(SXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uxtb(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 0001 101. .... .... ....
+ // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes);
+
+ Emit(UXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uxth(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0100 ..01 0011 101. .... .... ....
+ // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes);
+
+ Emit(UXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uxtw(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // UXTW <Zd>.D, <Pg>/M, <Zn>.D
+ // 0000 0100 ..01 0101 101. .... .... ....
+ // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes);
+
+ Emit(UXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEIntWideImmPredicated.
+
+void Assembler::cpy(const ZRegister& zd,
+ const PRegister& pg,
+ int imm8,
+ int shift) {
+ // CPY <Zd>.<T>, <Pg>/<ZM>, #<imm>{, <shift>}
+ // 0000 0101 ..01 .... 0... .... .... ....
+ // size<23:22> | Pg<19:16> | M<14> | sh<13> | imm8<12:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+
+ ResolveSVEImm8Shift(&imm8, &shift);
+
+ Instr sh = (shift > 0) ? (1 << 13) : 0;
+ Instr m = pg.IsMerging() ? (1 << 14) : 0;
+ Emit(CPY_z_p_i | m | sh | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) |
+ ImmField<12, 5>(imm8));
+}
+
+void Assembler::fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) {
+ // FCPY <Zd>.<T>, <Pg>/M, #<const>
+ // 0000 0101 ..01 .... 110. .... .... ....
+ // size<23:22> | Pg<19:16> | imm8<12:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Instr imm_field = ImmUnsignedField<12, 5>(FP64ToImm8(imm));
+ Emit(FCPY_z_p_i | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | imm_field);
+}
+
+// SVEIntAddSubtractImmUnpredicated.
+
+void Assembler::SVEIntAddSubtractImmUnpredicatedHelper(
+ SVEIntAddSubtractImm_UnpredicatedOp op,
+ const ZRegister& zd,
+ int imm8,
+ int shift) {
+ if (shift < 0) {
+ VIXL_ASSERT(shift == -1);
+ // Derive the shift amount from the immediate.
+ if (IsUint8(imm8)) {
+ shift = 0;
+ } else if (IsUint16(imm8) && ((imm8 % 256) == 0)) {
+ imm8 /= 256;
+ shift = 8;
+ }
+ }
+
+ VIXL_ASSERT(IsUint8(imm8));
+ VIXL_ASSERT((shift == 0) || (shift == 8));
+
+ Instr shift_bit = (shift > 0) ? (1 << 13) : 0;
+ Emit(op | SVESize(zd) | Rd(zd) | shift_bit | ImmUnsignedField<12, 5>(imm8));
+}
+
+void Assembler::add(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0000 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(ADD_z_zi, zd, imm8, shift);
+}
+
+void Assembler::dup(const ZRegister& zd, int imm8, int shift) {
+ // DUP <Zd>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..11 1000 11.. .... .... ....
+ // size<23:22> | opc<18:17> = 00 | sh<13> | imm8<12:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ ResolveSVEImm8Shift(&imm8, &shift);
+ VIXL_ASSERT((shift < 8) || !zd.IsLaneSizeB());
+
+ Instr shift_bit = (shift > 0) ? (1 << 13) : 0;
+ Emit(DUP_z_i | SVESize(zd) | Rd(zd) | shift_bit | ImmField<12, 5>(imm8));
+}
+
+void Assembler::fdup(const ZRegister& zd, double imm) {
+ // FDUP <Zd>.<T>, #<const>
+ // 0010 0101 ..11 1001 110. .... .... ....
+ // size<23:22> | opc<18:17> = 00 | o2<13> = 0 | imm8<12:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Instr encoded_imm = FP64ToImm8(imm) << 5;
+ Emit(FDUP_z_i | SVESize(zd) | encoded_imm | Rd(zd));
+}
+
+void Assembler::mul(const ZRegister& zd, const ZRegister& zn, int imm8) {
+ // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm>
+ // 0010 0101 ..11 0000 110. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(MUL_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8));
+}
+
+void Assembler::smax(const ZRegister& zd, const ZRegister& zn, int imm8) {
+ // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
+ // 0010 0101 ..10 1000 110. .... .... ....
+ // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(SMAX_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8));
+}
+
+void Assembler::smin(const ZRegister& zd, const ZRegister& zn, int imm8) {
+ // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
+ // 0010 0101 ..10 1010 110. .... .... ....
+ // size<23:22> | opc<18:16> = 010 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(SMIN_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8));
+}
+
+void Assembler::sqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0100 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 100 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(SQADD_z_zi, zd, imm8, shift);
+}
+
+void Assembler::sqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0110 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 110 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(SQSUB_z_zi, zd, imm8, shift);
+}
+
+void Assembler::sub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0001 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 001 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(SUB_z_zi, zd, imm8, shift);
+}
+
+void Assembler::subr(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0011 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 011 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(SUBR_z_zi, zd, imm8, shift);
+}
+
+void Assembler::umax(const ZRegister& zd, const ZRegister& zn, int imm8) {
+ // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
+ // 0010 0101 ..10 1001 110. .... .... ....
+ // size<23:22> | opc<18:16> = 001 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(UMAX_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8));
+}
+
+void Assembler::umin(const ZRegister& zd, const ZRegister& zn, int imm8) {
+ // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
+ // 0010 0101 ..10 1011 110. .... .... ....
+ // size<23:22> | opc<18:16> = 011 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(UMIN_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8));
+}
+
+void Assembler::uqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0101 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 101 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(UQADD_z_zi, zd, imm8, shift);
+}
+
+void Assembler::uqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift) {
+ // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+ // 0010 0101 ..10 0111 11.. .... .... ....
+ // size<23:22> | opc<18:16> = 111 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ SVEIntAddSubtractImmUnpredicatedHelper(UQSUB_z_zi, zd, imm8, shift);
+}
+
+// SVEMemLoad.
+
+void Assembler::SVELdSt1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ bool is_signed,
+ Instr op) {
+ VIXL_ASSERT(addr.IsContiguous());
+
+ Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr);
+ Instr dtype =
+ SVEDtype(msize_in_bytes_log2, zt.GetLaneSizeInBytesLog2(), is_signed);
+ Emit(op | mem_op | dtype | Rt(zt) | PgLow8(pg));
+}
+
+void Assembler::SVELdSt234Helper(int num_regs,
+ const ZRegister& zt1,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ Instr op) {
+ VIXL_ASSERT((num_regs >= 2) && (num_regs <= 4));
+
+ unsigned msize_in_bytes_log2 = zt1.GetLaneSizeInBytesLog2();
+ Instr num = (num_regs - 1) << 21;
+ Instr msz = msize_in_bytes_log2 << 23;
+ Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, num_regs, addr);
+ Emit(op | mem_op | msz | num | Rt(zt1) | PgLow8(pg));
+}
+
+void Assembler::SVELd1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ bool is_signed) {
+ VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2);
+ if (is_signed) {
+ // Sign-extension is only possible when the vector elements are larger than
+ // the elements in memory.
+ VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2);
+ }
+
+ if (addr.IsScatterGather()) {
+ bool is_load = true;
+ bool is_ff = false;
+ SVEScatterGatherHelper(msize_in_bytes_log2,
+ zt,
+ pg,
+ addr,
+ is_load,
+ is_signed,
+ is_ff);
+ return;
+ }
+
+ Instr op = 0xffffffff;
+ if (addr.IsScalarPlusImmediate()) {
+ op = SVEContiguousLoad_ScalarPlusImmFixed;
+ } else if (addr.IsScalarPlusScalar()) {
+ // Rm must not be xzr.
+ VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+ op = SVEContiguousLoad_ScalarPlusScalarFixed;
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+ SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op);
+}
+
+void Assembler::SVELdff1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ bool is_signed) {
+ VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2);
+ if (is_signed) {
+ // Sign-extension is only possible when the vector elements are larger than
+ // the elements in memory.
+ VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2);
+ }
+
+ if (addr.IsScatterGather()) {
+ bool is_load = true;
+ bool is_ff = true;
+ SVEScatterGatherHelper(msize_in_bytes_log2,
+ zt,
+ pg,
+ addr,
+ is_load,
+ is_signed,
+ is_ff);
+ return;
+ }
+
+ if (addr.IsPlainScalar()) {
+ // SVEMemOperand(x0) is treated as a scalar-plus-immediate form ([x0, #0]).
+ // In these instructions, we want to treat it as [x0, xzr].
+ SVEMemOperand addr_scalar_plus_scalar(addr.GetScalarBase(), xzr);
+ // Guard against infinite recursion.
+ VIXL_ASSERT(!addr_scalar_plus_scalar.IsPlainScalar());
+ SVELdff1Helper(msize_in_bytes_log2,
+ zt,
+ pg,
+ addr_scalar_plus_scalar,
+ is_signed);
+ return;
+ }
+
+ Instr op = 0xffffffff;
+ if (addr.IsScalarPlusScalar()) {
+ op = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed;
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+ SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op);
+}
+
+void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ bool is_load,
+ bool is_signed,
+ bool is_first_fault) {
+ VIXL_ASSERT(addr.IsScatterGather());
+ VIXL_ASSERT(zt.IsLaneSizeS() || zt.IsLaneSizeD());
+ VIXL_ASSERT(is_load || !is_first_fault);
+ VIXL_ASSERT(is_load || !is_signed);
+
+ Instr op = 0xffffffff;
+ if (addr.IsVectorPlusImmediate()) {
+ VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorBase()));
+ if (is_load) {
+ if (zt.IsLaneSizeS()) {
+ op = SVE32BitGatherLoad_VectorPlusImmFixed;
+ } else {
+ op = SVE64BitGatherLoad_VectorPlusImmFixed;
+ }
+ } else {
+ if (zt.IsLaneSizeS()) {
+ op = SVE32BitScatterStore_VectorPlusImmFixed;
+ } else {
+ op = SVE64BitScatterStore_VectorPlusImmFixed;
+ }
+ }
+ } else {
+ VIXL_ASSERT(addr.IsScalarPlusVector());
+ VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorOffset()));
+ SVEOffsetModifier mod = addr.GetOffsetModifier();
+ if (zt.IsLaneSizeS()) {
+ VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW));
+ unsigned shift_amount = addr.GetShiftAmount();
+ if (shift_amount == 0) {
+ if (is_load) {
+ op = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed;
+ } else {
+ op = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed;
+ }
+ } else if (shift_amount == 1) {
+ VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2);
+ if (is_load) {
+ op = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed;
+ } else {
+ op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed;
+ }
+ } else {
+ VIXL_ASSERT(shift_amount == 2);
+ VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2);
+ if (is_load) {
+ op = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed;
+ } else {
+ op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed;
+ }
+ }
+ } else if (zt.IsLaneSizeD()) {
+ switch (mod) {
+ case NO_SVE_OFFSET_MODIFIER:
+ if (is_load) {
+ op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed;
+ } else {
+ op = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed;
+ }
+ break;
+ case SVE_LSL:
+ if (is_load) {
+ op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed;
+ } else {
+ op = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed;
+ }
+ break;
+ case SVE_UXTW:
+ case SVE_SXTW: {
+ unsigned shift_amount = addr.GetShiftAmount();
+ if (shift_amount == 0) {
+ if (is_load) {
+ op =
+ SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed;
+ } else {
+ op =
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed;
+ }
+ } else {
+ VIXL_ASSERT(shift_amount == msize_in_bytes_log2);
+ if (is_load) {
+ op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed;
+ } else {
+ op =
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed;
+ }
+ }
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ }
+
+ Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr, is_load);
+ Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2);
+ Instr u = (!is_load || is_signed) ? 0 : (1 << 14);
+ Instr ff = is_first_fault ? (1 << 13) : 0;
+ Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg));
+}
+
+void Assembler::SVELd234Helper(int num_regs,
+ const ZRegister& zt1,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ if (addr.IsScalarPlusScalar()) {
+ // Rm must not be xzr.
+ VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+ }
+
+ Instr op;
+ if (addr.IsScalarPlusImmediate()) {
+ op = SVELoadMultipleStructures_ScalarPlusImmFixed;
+ } else if (addr.IsScalarPlusScalar()) {
+ op = SVELoadMultipleStructures_ScalarPlusScalarFixed;
+ } else {
+ // These instructions don't support any other addressing modes.
+ VIXL_ABORT();
+ }
+ SVELdSt234Helper(num_regs, zt1, pg, addr, op);
+}
+
+// SVEMemContiguousLoad.
+
+#define VIXL_DEFINE_LD1(MSZ, LANE_SIZE) \
+ void Assembler::ld1##MSZ(const ZRegister& zt, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \
+ }
+#define VIXL_DEFINE_LD2(MSZ, LANE_SIZE) \
+ void Assembler::ld2##MSZ(const ZRegister& zt1, \
+ const ZRegister& zt2, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ USE(zt2); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(AreConsecutive(zt1, zt2)); \
+ VIXL_ASSERT(AreSameFormat(zt1, zt2)); \
+ VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \
+ SVELd234Helper(2, zt1, pg, addr); \
+ }
+#define VIXL_DEFINE_LD3(MSZ, LANE_SIZE) \
+ void Assembler::ld3##MSZ(const ZRegister& zt1, \
+ const ZRegister& zt2, \
+ const ZRegister& zt3, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ USE(zt2, zt3); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \
+ VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \
+ VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \
+ SVELd234Helper(3, zt1, pg, addr); \
+ }
+#define VIXL_DEFINE_LD4(MSZ, LANE_SIZE) \
+ void Assembler::ld4##MSZ(const ZRegister& zt1, \
+ const ZRegister& zt2, \
+ const ZRegister& zt3, \
+ const ZRegister& zt4, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ USE(zt2, zt3, zt4); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \
+ VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \
+ VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \
+ SVELd234Helper(4, zt1, pg, addr); \
+ }
+
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD1)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD2)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD3)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD4)
+
+#define VIXL_DEFINE_LD1S(MSZ, LANE_SIZE) \
+ void Assembler::ld1s##MSZ(const ZRegister& zt, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \
+ }
+VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LD1S)
+
+// SVEMem32BitGatherAndUnsizedContiguous.
+
+void Assembler::SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ bool is_signed) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate());
+ VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2);
+ if (is_signed) {
+ // Sign-extension is only possible when the vector elements are larger than
+ // the elements in memory.
+ VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2);
+ }
+
+ int64_t imm = addr.GetImmediateOffset();
+ int divisor = 1 << msize_in_bytes_log2;
+ VIXL_ASSERT(imm % divisor == 0);
+ Instr dtype = SVEDtypeSplit(msize_in_bytes_log2,
+ zt.GetLaneSizeInBytesLog2(),
+ is_signed);
+
+ Emit(SVELoadAndBroadcastElementFixed | dtype | RnSP(addr.GetScalarBase()) |
+ ImmUnsignedField<21, 16>(imm / divisor) | Rt(zt) | PgLow8(pg));
+}
+
+// This prototype maps to 4 instruction encodings:
+// LD1RB_z_p_bi_u16
+// LD1RB_z_p_bi_u32
+// LD1RB_z_p_bi_u64
+// LD1RB_z_p_bi_u8
+void Assembler::ld1rb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+// This prototype maps to 3 instruction encodings:
+// LD1RH_z_p_bi_u16
+// LD1RH_z_p_bi_u32
+// LD1RH_z_p_bi_u64
+void Assembler::ld1rh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+// This prototype maps to 2 instruction encodings:
+// LD1RW_z_p_bi_u32
+// LD1RW_z_p_bi_u64
+void Assembler::ld1rw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kSRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+void Assembler::ld1rd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kDRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+// This prototype maps to 3 instruction encodings:
+// LD1RSB_z_p_bi_s16
+// LD1RSB_z_p_bi_s32
+// LD1RSB_z_p_bi_s64
+void Assembler::ld1rsb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, true);
+}
+
+// This prototype maps to 2 instruction encodings:
+// LD1RSH_z_p_bi_s32
+// LD1RSH_z_p_bi_s64
+void Assembler::ld1rsh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, true);
+}
+
+void Assembler::ld1rsw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ SVELd1BroadcastHelper(kWRegSizeInBytesLog2, zt, pg, addr, true);
+}
+
+void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) {
+ // LDR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister());
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+ int64_t imm9 = addr.GetImmediateOffset();
+ VIXL_ASSERT(IsInt9(imm9));
+ Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10;
+ Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16;
+
+ Instr op = LDR_z_bi;
+ if (rt.IsPRegister()) {
+ op = LDR_p_bi;
+ }
+ Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l);
+}
+
+// SVEMem64BitGather.
+
+// This prototype maps to 3 instruction encodings:
+// LDFF1B_z_p_bz_d_64_unscaled
+// LDFF1B_z_p_bz_d_x32_unscaled
+void Assembler::ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+ // 1100 0100 010. .... 111. .... .... ....
+ // msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1B_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// LDFF1B_z_p_ai_d
+// LDFF1B_z_p_ai_s
+void Assembler::ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0100 001. .... 111. .... .... ....
+ // msz<24:23> = 00 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1B_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 4 instruction encodings:
+// LDFF1D_z_p_bz_d_64_scaled
+// LDFF1D_z_p_bz_d_64_unscaled
+// LDFF1D_z_p_bz_d_x32_scaled
+// LDFF1D_z_p_bz_d_x32_unscaled
+void Assembler::ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]
+ // 1100 0101 111. .... 111. .... .... ....
+ // msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1D_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+void Assembler::ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0101 101. .... 111. .... .... ....
+ // msz<24:23> = 11 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1D_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 6 instruction encodings:
+// LDFF1H_z_p_bz_d_64_scaled
+// LDFF1H_z_p_bz_d_64_unscaled
+// LDFF1H_z_p_bz_d_x32_scaled
+// LDFF1H_z_p_bz_d_x32_unscaled
+void Assembler::ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
+ // 1100 0100 111. .... 111. .... .... ....
+ // msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1H_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// LDFF1H_z_p_ai_d
+// LDFF1H_z_p_ai_s
+void Assembler::ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0100 101. .... 111. .... .... ....
+ // msz<24:23> = 01 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1H_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 3 instruction encodings:
+// LDFF1SB_z_p_bz_d_64_unscaled
+// LDFF1SB_z_p_bz_d_x32_unscaled
+void Assembler::ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+ // 1100 0100 010. .... 101. .... .... ....
+ // msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1SB_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// LDFF1SB_z_p_ai_d
+// LDFF1SB_z_p_ai_s
+void Assembler::ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0100 001. .... 101. .... .... ....
+ // msz<24:23> = 00 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1SB_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) |
+ ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 6 instruction encodings:
+// LDFF1SH_z_p_bz_d_64_scaled
+// LDFF1SH_z_p_bz_d_64_unscaled
+// LDFF1SH_z_p_bz_d_x32_scaled
+// LDFF1SH_z_p_bz_d_x32_unscaled
+void Assembler::ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
+ // 1100 0100 111. .... 101. .... .... ....
+ // msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1SH_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// LDFF1SH_z_p_ai_d
+// LDFF1SH_z_p_ai_s
+void Assembler::ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0100 101. .... 101. .... .... ....
+ // msz<24:23> = 01 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1SH_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) |
+ ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 4 instruction encodings:
+// LDFF1SW_z_p_bz_d_64_scaled
+// LDFF1SW_z_p_bz_d_64_unscaled
+// LDFF1SW_z_p_bz_d_x32_scaled
+// LDFF1SW_z_p_bz_d_x32_unscaled
+void Assembler::ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
+ // 1100 0101 011. .... 101. .... .... ....
+ // msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1SW_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+void Assembler::ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0101 001. .... 101. .... .... ....
+ // msz<24:23> = 10 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1SW_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) |
+ ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 6 instruction encodings:
+// LDFF1W_z_p_bz_d_64_scaled
+// LDFF1W_z_p_bz_d_64_unscaled
+// LDFF1W_z_p_bz_d_x32_scaled
+// LDFF1W_z_p_bz_d_x32_unscaled
+void Assembler::ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
+ // 1100 0101 011. .... 111. .... .... ....
+ // msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+ // | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1W_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// LDFF1W_z_p_ai_d
+// LDFF1W_z_p_ai_s
+void Assembler::ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+ // 1100 0101 001. .... 111. .... .... ....
+ // msz<24:23> = 10 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+ // Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LDFF1W_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+void Assembler::SVEGatherPrefetchVectorPlusImmediateHelper(
+ PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size) {
+ VIXL_ASSERT(addr.IsVectorPlusImmediate());
+ ZRegister zn = addr.GetVectorBase();
+ VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+
+ Instr op = 0xffffffff;
+ switch (prefetch_size) {
+ case kBRegSize:
+ op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_ai_s)
+ : static_cast<Instr>(PRFB_i_p_ai_d);
+ break;
+ case kHRegSize:
+ op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_ai_s)
+ : static_cast<Instr>(PRFH_i_p_ai_d);
+ break;
+ case kSRegSize:
+ op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_ai_s)
+ : static_cast<Instr>(PRFW_i_p_ai_d);
+ break;
+ case kDRegSize:
+ op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_ai_s)
+ : static_cast<Instr>(PRFD_i_p_ai_d);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ int64_t imm5 = addr.GetImmediateOffset();
+ Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | Rn(zn) |
+ ImmUnsignedField<20, 16>(imm5));
+}
+
+void Assembler::SVEGatherPrefetchScalarPlusImmediateHelper(
+ PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate());
+ int64_t imm6 = addr.GetImmediateOffset();
+
+ Instr op = 0xffffffff;
+ switch (prefetch_size) {
+ case kBRegSize:
+ op = PRFB_i_p_bi_s;
+ break;
+ case kHRegSize:
+ op = PRFH_i_p_bi_s;
+ break;
+ case kSRegSize:
+ op = PRFW_i_p_bi_s;
+ break;
+ case kDRegSize:
+ op = PRFD_i_p_bi_s;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) |
+ RnSP(addr.GetScalarBase()) | ImmField<21, 16>(imm6));
+}
+
+void Assembler::SVEContiguousPrefetchScalarPlusScalarHelper(
+ PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size) {
+ VIXL_ASSERT(addr.IsScalarPlusScalar());
+ Instr op = 0xffffffff;
+
+ switch (prefetch_size) {
+ case kBRegSize:
+ VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER);
+ op = PRFB_i_p_br_s;
+ break;
+ case kHRegSize:
+ VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL);
+ VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2);
+ op = PRFH_i_p_br_s;
+ break;
+ case kSRegSize:
+ VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL);
+ VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2);
+ op = PRFW_i_p_br_s;
+ break;
+ case kDRegSize:
+ VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL);
+ VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2);
+ op = PRFD_i_p_br_s;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+ Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) |
+ RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset()));
+}
+
+void Assembler::SVEContiguousPrefetchScalarPlusVectorHelper(
+ PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size) {
+ VIXL_ASSERT(addr.IsScalarPlusVector());
+ ZRegister zm = addr.GetVectorOffset();
+ SVEOffsetModifier mod = addr.GetOffsetModifier();
+
+ // All prefetch scalar-plus-vector addressing modes use a shift corresponding
+ // to the element size.
+ switch (prefetch_size) {
+ case kBRegSize:
+ VIXL_ASSERT(addr.GetShiftAmount() == kBRegSizeInBytesLog2);
+ break;
+ case kHRegSize:
+ VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2);
+ break;
+ case kSRegSize:
+ VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2);
+ break;
+ case kDRegSize:
+ VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ Instr sx = 0;
+ Instr op = 0xffffffff;
+ if ((mod == NO_SVE_OFFSET_MODIFIER) || (mod == SVE_LSL)) {
+ VIXL_ASSERT(zm.IsLaneSizeD());
+
+ switch (prefetch_size) {
+ case kBRegSize:
+ VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
+ op = PRFB_i_p_bz_d_64_scaled;
+ break;
+ case kHRegSize:
+ VIXL_ASSERT(mod == SVE_LSL);
+ op = PRFH_i_p_bz_d_64_scaled;
+ break;
+ case kSRegSize:
+ VIXL_ASSERT(mod == SVE_LSL);
+ op = PRFW_i_p_bz_d_64_scaled;
+ break;
+ case kDRegSize:
+ VIXL_ASSERT(mod == SVE_LSL);
+ op = PRFD_i_p_bz_d_64_scaled;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ } else {
+ VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW));
+ VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD());
+
+ switch (prefetch_size) {
+ case kBRegSize:
+ op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_bz_s_x32_scaled)
+ : static_cast<Instr>(PRFB_i_p_bz_d_x32_scaled);
+ break;
+ case kHRegSize:
+ op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_bz_s_x32_scaled)
+ : static_cast<Instr>(PRFH_i_p_bz_d_x32_scaled);
+ break;
+ case kSRegSize:
+ op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_bz_s_x32_scaled)
+ : static_cast<Instr>(PRFW_i_p_bz_d_x32_scaled);
+ break;
+ case kDRegSize:
+ op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_bz_s_x32_scaled)
+ : static_cast<Instr>(PRFD_i_p_bz_d_x32_scaled);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (mod == SVE_SXTW) {
+ sx = 1 << 22;
+ }
+ }
+
+ Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | sx |
+ RnSP(addr.GetScalarBase()) | Rm(zm));
+}
+
+void Assembler::SVEPrefetchHelper(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ int prefetch_size) {
+ if (addr.IsVectorPlusImmediate()) {
+ // For example:
+ // [z0.s, #0]
+ SVEGatherPrefetchVectorPlusImmediateHelper(prfop, pg, addr, prefetch_size);
+
+ } else if (addr.IsScalarPlusImmediate()) {
+ // For example:
+ // [x0, #42, mul vl]
+ SVEGatherPrefetchScalarPlusImmediateHelper(prfop, pg, addr, prefetch_size);
+
+ } else if (addr.IsScalarPlusVector()) {
+ // For example:
+ // [x0, z0.s, sxtw]
+ SVEContiguousPrefetchScalarPlusVectorHelper(prfop, pg, addr, prefetch_size);
+
+ } else if (addr.IsScalarPlusScalar()) {
+ // For example:
+ // [x0, x1]
+ SVEContiguousPrefetchScalarPlusScalarHelper(prfop, pg, addr, prefetch_size);
+
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Assembler::prfb(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ SVEPrefetchHelper(prfop, pg, addr, kBRegSize);
+}
+
+void Assembler::prfd(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ SVEPrefetchHelper(prfop, pg, addr, kDRegSize);
+}
+
+void Assembler::prfh(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ SVEPrefetchHelper(prfop, pg, addr, kHRegSize);
+}
+
+void Assembler::prfw(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ SVEPrefetchHelper(prfop, pg, addr, kSRegSize);
+}
+
+void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ Instr regoffset_op,
+ Instr immoffset_op,
+ int imm_divisor) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsScalarPlusScalar() || addr.IsScalarPlusImmediate());
+
+ Instr op;
+ if (addr.IsScalarPlusScalar()) {
+ op = regoffset_op | Rm(addr.GetScalarOffset());
+ } else {
+ int64_t imm = addr.GetImmediateOffset();
+ VIXL_ASSERT(((imm % imm_divisor) == 0) && IsInt4(imm / imm_divisor));
+ op = immoffset_op | ImmField<19, 16>(imm / imm_divisor);
+ }
+ Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase()));
+}
+
+void Assembler::ld1rqb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(0));
+ VIXL_ASSERT(zt.IsLaneSizeB());
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LD1RQB_z_p_br_contiguous,
+ LD1RQB_z_p_bi_u8,
+ 16);
+}
+
+void Assembler::ld1rqd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(3));
+ VIXL_ASSERT(zt.IsLaneSizeD());
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LD1RQD_z_p_br_contiguous,
+ LD1RQD_z_p_bi_u64,
+ 16);
+}
+
+void Assembler::ld1rqh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(1));
+ VIXL_ASSERT(zt.IsLaneSizeH());
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LD1RQH_z_p_br_contiguous,
+ LD1RQH_z_p_bi_u16,
+ 16);
+}
+
+void Assembler::ld1rqw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(2));
+ VIXL_ASSERT(zt.IsLaneSizeS());
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LD1RQW_z_p_br_contiguous,
+ LD1RQW_z_p_bi_u32,
+ 16);
+}
+
+#define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE) \
+ void Assembler::ldff1##MSZ(const ZRegister& zt, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \
+ }
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LDFF1)
+
+#define VIXL_DEFINE_LDFF1S(MSZ, LANE_SIZE) \
+ void Assembler::ldff1s##MSZ(const ZRegister& zt, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \
+ }
+VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LDFF1S)
+
+void Assembler::ldnf1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(0,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ false,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(3,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ false,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(1,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ false,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(0,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ true,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(1,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ true,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(2,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ true,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(addr.IsPlainRegister() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+ SVELdSt1Helper(2,
+ zt,
+ pg,
+ addr,
+ /* is_signed = */ false,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnt1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1B_z_p_br_contiguous,
+ LDNT1B_z_p_bi_contiguous);
+}
+
+void Assembler::ldnt1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1D_z_p_br_contiguous,
+ LDNT1D_z_p_bi_contiguous);
+}
+
+void Assembler::ldnt1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1H_z_p_br_contiguous,
+ LDNT1H_z_p_bi_contiguous);
+}
+
+void Assembler::ldnt1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1W_z_p_br_contiguous,
+ LDNT1W_z_p_bi_contiguous);
+}
+
+Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2,
+ int num_regs,
+ const SVEMemOperand& addr,
+ bool is_load) {
+ VIXL_ASSERT((num_regs >= 1) && (num_regs <= 4));
+
+ Instr op = 0xfffffff;
+ if (addr.IsScalarPlusImmediate()) {
+ VIXL_ASSERT((addr.GetImmediateOffset() == 0) || addr.IsMulVl());
+ int64_t imm = addr.GetImmediateOffset();
+ VIXL_ASSERT((imm % num_regs) == 0);
+ op = RnSP(addr.GetScalarBase()) | ImmField<19, 16>(imm / num_regs);
+
+ } else if (addr.IsScalarPlusScalar()) {
+ VIXL_ASSERT(addr.GetScalarOffset().IsZero() ||
+ addr.IsEquivalentToLSL(msize_in_bytes_log2));
+ op = RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset());
+
+ } else if (addr.IsVectorPlusImmediate()) {
+ ZRegister zn = addr.GetVectorBase();
+ uint64_t imm = addr.GetImmediateOffset();
+ VIXL_ASSERT(num_regs == 1);
+ VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+ VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2)));
+ op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2);
+
+ } else if (addr.IsScalarPlusVector()) {
+ // We have to support several different addressing modes. Some instructions
+ // support a subset of these, but the SVEMemOperand encoding is consistent.
+ Register xn = addr.GetScalarBase();
+ ZRegister zm = addr.GetVectorOffset();
+ SVEOffsetModifier mod = addr.GetOffsetModifier();
+ Instr modifier_bit = 1 << (is_load ? 22 : 14);
+ Instr xs = (mod == SVE_SXTW) ? modifier_bit : 0;
+ VIXL_ASSERT(num_regs == 1);
+
+ if (mod == SVE_LSL) {
+ // 64-bit scaled offset: [<Xn|SP>, <Zm>.D, LSL #<shift>]
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ VIXL_ASSERT(addr.GetShiftAmount() == msize_in_bytes_log2);
+ } else if (mod == NO_SVE_OFFSET_MODIFIER) {
+ // 64-bit unscaled offset: [<Xn|SP>, <Zm>.D]
+ VIXL_ASSERT(zm.IsLaneSizeD());
+ VIXL_ASSERT(addr.GetShiftAmount() == 0);
+ } else {
+ // 32-bit scaled offset: [<Xn|SP>, <Zm>.S, <mod> #<shift>]
+ // 32-bit unscaled offset: [<Xn|SP>, <Zm>.S, <mod>]
+ // 32-bit unpacked scaled offset: [<Xn|SP>, <Zm>.D, <mod> #<shift>]
+ // 32-bit unpacked unscaled offset: [<Xn|SP>, <Zm>.D, <mod>]
+ VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD());
+ VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW));
+ VIXL_ASSERT((addr.GetShiftAmount() == 0) ||
+ (addr.GetShiftAmount() == msize_in_bytes_log2));
+ }
+
+ // The form itself is encoded in the instruction opcode.
+ op = RnSP(xn) | Rm(zm) | xs;
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ return op;
+}
+
+// SVEMemStore.
+
+void Assembler::SVESt1Helper(unsigned msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ if (addr.IsScalarPlusScalar()) {
+ // Rm must not be xzr.
+ VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+ }
+
+ if (addr.IsScatterGather()) {
+ bool is_load = false;
+ bool is_signed = false;
+ bool is_ff = false;
+ SVEScatterGatherHelper(msize_in_bytes_log2,
+ zt,
+ pg,
+ addr,
+ is_load,
+ is_signed,
+ is_ff);
+ return;
+ }
+
+ Instr op;
+ if (addr.IsScalarPlusImmediate()) {
+ op = SVEContiguousStore_ScalarPlusImmFixed;
+ } else if (addr.IsScalarPlusScalar()) {
+ op = SVEContiguousStore_ScalarPlusScalarFixed;
+ } else {
+ VIXL_UNIMPLEMENTED();
+ op = 0xffffffff;
+ }
+ SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, false, op);
+}
+
+void Assembler::SVESt234Helper(int num_regs,
+ const ZRegister& zt1,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ if (addr.IsScalarPlusScalar()) {
+ // Rm must not be xzr.
+ VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+ }
+
+ Instr op;
+ if (addr.IsScalarPlusImmediate()) {
+ op = SVEStoreMultipleStructures_ScalarPlusImmFixed;
+ } else if (addr.IsScalarPlusScalar()) {
+ op = SVEStoreMultipleStructures_ScalarPlusScalarFixed;
+ } else {
+ // These instructions don't support any other addressing modes.
+ VIXL_ABORT();
+ }
+ SVELdSt234Helper(num_regs, zt1, pg, addr, op);
+}
+
+#define VIXL_DEFINE_ST1(MSZ, LANE_SIZE) \
+ void Assembler::st1##MSZ(const ZRegister& zt, \
+ const PRegister& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ SVESt1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr); \
+ }
+#define VIXL_DEFINE_ST2(MSZ, LANE_SIZE) \
+ void Assembler::st2##MSZ(const ZRegister& zt1, \
+ const ZRegister& zt2, \
+ const PRegister& pg, \
+ const SVEMemOperand& addr) { \
+ USE(zt2); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(AreConsecutive(zt1, zt2)); \
+ VIXL_ASSERT(AreSameFormat(zt1, zt2)); \
+ VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \
+ SVESt234Helper(2, zt1, pg, addr); \
+ }
+#define VIXL_DEFINE_ST3(MSZ, LANE_SIZE) \
+ void Assembler::st3##MSZ(const ZRegister& zt1, \
+ const ZRegister& zt2, \
+ const ZRegister& zt3, \
+ const PRegister& pg, \
+ const SVEMemOperand& addr) { \
+ USE(zt2, zt3); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \
+ VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \
+ VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \
+ SVESt234Helper(3, zt1, pg, addr); \
+ }
+#define VIXL_DEFINE_ST4(MSZ, LANE_SIZE) \
+ void Assembler::st4##MSZ(const ZRegister& zt1, \
+ const ZRegister& zt2, \
+ const ZRegister& zt3, \
+ const ZRegister& zt4, \
+ const PRegister& pg, \
+ const SVEMemOperand& addr) { \
+ USE(zt2, zt3, zt4); \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \
+ VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \
+ VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \
+ VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \
+ SVESt234Helper(4, zt1, pg, addr); \
+ }
+
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST1)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST2)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST3)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST4)
+
+void Assembler::stnt1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1B_z_p_br_contiguous,
+ STNT1B_z_p_bi_contiguous);
+}
+
+void Assembler::stnt1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1D_z_p_br_contiguous,
+ STNT1D_z_p_bi_contiguous);
+}
+
+void Assembler::stnt1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1H_z_p_br_contiguous,
+ STNT1H_z_p_bi_contiguous);
+}
+
+void Assembler::stnt1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)));
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1W_z_p_br_contiguous,
+ STNT1W_z_p_bi_contiguous);
+}
+
+void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) {
+ // STR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister());
+ VIXL_ASSERT(addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() &&
+ (addr.GetOffsetModifier() == SVE_MUL_VL)));
+ int64_t imm9 = addr.GetImmediateOffset();
+ VIXL_ASSERT(IsInt9(imm9));
+ Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10;
+ Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16;
+
+ Instr op = STR_z_bi;
+ if (rt.IsPRegister()) {
+ op = STR_p_bi;
+ }
+ Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l);
+}
+
+// SVEMulIndex.
+
+void Assembler::sdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+
+ Instr op = 0xffffffff;
+ switch (zda.GetLaneSizeInBits()) {
+ case kSRegSize:
+ VIXL_ASSERT(IsUint2(index));
+ op = SDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn);
+ break;
+ case kDRegSize:
+ VIXL_ASSERT(IsUint1(index));
+ op = SDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ Emit(op);
+}
+
+void Assembler::udot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+
+ Instr op = 0xffffffff;
+ switch (zda.GetLaneSizeInBits()) {
+ case kSRegSize:
+ VIXL_ASSERT(IsUint2(index));
+ op = UDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn);
+ break;
+ case kDRegSize:
+ VIXL_ASSERT(IsUint1(index));
+ op = UDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ Emit(op);
+}
+
+// SVEPartitionBreak.
+
+void Assembler::brka(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+ VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+ Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000;
+ Emit(BRKA_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn));
+}
+
+void Assembler::brkas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+ Emit(BRKAS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+void Assembler::brkb(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+ VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+ Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000;
+ Emit(BRKB_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn));
+}
+
+void Assembler::brkbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+ Emit(BRKBS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+void Assembler::brkn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ USE(pm);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+ VIXL_ASSERT(pd.Is(pm));
+
+ Emit(BRKN_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+void Assembler::brkns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ USE(pm);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+ VIXL_ASSERT(pd.Is(pm));
+
+ Emit(BRKNS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+// SVEPermutePredicate.
+
+void Assembler::punpkhi(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn) {
+ // PUNPKHI <Pd>.H, <Pn>.B
+ // 0000 0101 0011 0001 0100 000. ...0 ....
+ // H<16> = 1 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.IsLaneSizeH());
+ VIXL_ASSERT(pn.IsLaneSizeB());
+
+ Emit(PUNPKHI_p_p | Pd(pd) | Pn(pn));
+}
+
+void Assembler::punpklo(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn) {
+ // PUNPKLO <Pd>.H, <Pn>.B
+ // 0000 0101 0011 0000 0100 000. ...0 ....
+ // H<16> = 0 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.IsLaneSizeH());
+ VIXL_ASSERT(pn.IsLaneSizeB());
+
+ Emit(PUNPKLO_p_p | Pd(pd) | Pn(pn));
+}
+
+void Assembler::rev(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn) {
+ // REV <Pd>.<T>, <Pn>.<T>
+ // 0000 0101 ..11 0100 0100 000. ...0 ....
+ // size<23:22> | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn));
+
+ Emit(REV_p_p | SVESize(pd) | Pd(pd) | Rx<8, 5>(pn));
+}
+
+void Assembler::trn1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+ // 0000 0101 ..10 .... 0101 000. ...0 ....
+ // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 0 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+ Emit(TRN1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::trn2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+ // 0000 0101 ..10 .... 0101 010. ...0 ....
+ // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 1 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+ Emit(TRN2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::uzp1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+ // 0000 0101 ..10 .... 0100 100. ...0 ....
+ // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 0 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+ Emit(UZP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::uzp2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+ // 0000 0101 ..10 .... 0100 110. ...0 ....
+ // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 1 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+ Emit(UZP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::zip1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+ // 0000 0101 ..10 .... 0100 000. ...0 ....
+ // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 0 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+ Emit(ZIP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::zip2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+ // 0000 0101 ..10 .... 0100 010. ...0 ....
+ // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 1 | Pn<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+ Emit(ZIP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+// SVEPermuteVectorExtract.
+
+void Assembler::ext(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned offset) {
+ // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+ // 0000 0101 001. .... 000. .... .... ....
+ // imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(IsUint8(offset));
+
+ int imm8h = ExtractUnsignedBitfield32(7, 3, offset);
+ int imm8l = ExtractUnsignedBitfield32(2, 0, offset);
+ Emit(EXT_z_zi_des | Rd(zd) | Rn(zm) | ImmUnsignedField<20, 16>(imm8h) |
+ ImmUnsignedField<12, 10>(imm8l));
+}
+
+// SVEPermuteVectorInterleaving.
+
+void Assembler::trn1(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // TRN1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0111 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(TRN1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::trn2(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // TRN2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0111 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(TRN2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uzp1(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UZP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0110 10.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(UZP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uzp2(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UZP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0110 11.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(UZP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::zip1(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ZIP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0110 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(ZIP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::zip2(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ZIP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0110 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(ZIP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEPermuteVectorPredicated.
+
+void Assembler::clasta(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm) {
+ // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T>
+ // 0000 0101 ..11 0000 101. .... .... ....
+ // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Rdn<4:0>
+
+ USE(rn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(rd.Is(rn));
+
+ Emit(CLASTA_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clasta(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm) {
+ // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+ // 0000 0101 ..10 1010 100. .... .... ....
+ // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Vdn<4:0>
+
+ USE(vn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.Is(vn));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(AreSameLaneSize(vd, zm));
+
+ Emit(CLASTA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clasta(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0101 ..10 1000 100. .... .... ....
+ // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(CLASTA_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clastb(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm) {
+ // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T>
+ // 0000 0101 ..11 0001 101. .... .... ....
+ // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Rdn<4:0>
+
+ USE(rn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(rd.Is(rn));
+
+ Emit(CLASTB_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clastb(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm) {
+ // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+ // 0000 0101 ..10 1011 100. .... .... ....
+ // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Vdn<4:0>
+
+ USE(vn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.Is(vn));
+ VIXL_ASSERT(vd.IsScalar());
+ VIXL_ASSERT(AreSameLaneSize(vd, zm));
+
+ Emit(CLASTB_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clastb(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0101 ..10 1001 100. .... .... ....
+ // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(CLASTB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::compact(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T>
+ // 0000 0101 1.10 0001 100. .... .... ....
+ // sz<22> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT((zd.GetLaneSizeInBits() == kSRegSize) ||
+ (zd.GetLaneSizeInBits() == kDRegSize));
+
+ Instr sz = (zd.GetLaneSizeInBits() == kDRegSize) ? (1 << 22) : 0;
+ Emit(COMPACT_z_p_z | sz | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cpy(const ZRegister& zd,
+ const PRegisterM& pg,
+ const Register& rn) {
+ // CPY <Zd>.<T>, <Pg>/M, <R><n|SP>
+ // 0000 0101 ..10 1000 101. .... .... ....
+ // size<23:22> | Pg<12:10> | Rn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >=
+ zd.GetLaneSizeInBits());
+
+ Emit(CPY_z_p_r | SVESize(zd) | Rd(zd) | PgLow8(pg) | RnSP(rn));
+}
+
+void Assembler::cpy(const ZRegister& zd,
+ const PRegisterM& pg,
+ const VRegister& vn) {
+ // CPY <Zd>.<T>, <Pg>/M, <V><n>
+ // 0000 0101 ..10 0000 100. .... .... ....
+ // size<23:22> | Pg<12:10> | Vn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vn.IsScalar());
+ VIXL_ASSERT(static_cast<unsigned>(vn.GetSizeInBits()) ==
+ zd.GetLaneSizeInBits());
+
+ Emit(CPY_z_p_v | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(vn));
+}
+
+void Assembler::lasta(const Register& rd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // LASTA <R><d>, <Pg>, <Zn>.<T>
+ // 0000 0101 ..10 0000 101. .... .... ....
+ // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Rd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LASTA_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::lasta(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // LASTA <V><d>, <Pg>, <Zn>.<T>
+ // 0000 0101 ..10 0010 100. .... .... ....
+ // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(LASTA_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::lastb(const Register& rd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // LASTB <R><d>, <Pg>, <Zn>.<T>
+ // 0000 0101 ..10 0001 101. .... .... ....
+ // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Rd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(LASTB_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::lastb(const VRegister& vd,
+ const PRegister& pg,
+ const ZRegister& zn) {
+ // LASTB <V><d>, <Pg>, <Zn>.<T>
+ // 0000 0101 ..10 0011 100. .... .... ....
+ // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vd.IsScalar());
+
+ Emit(LASTB_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::rbit(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0101 ..10 0111 100. .... .... ....
+ // size<23:22> | opc<17:16> = 11 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(RBIT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::revb(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0101 ..10 0100 100. .... .... ....
+ // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.IsLaneSizeH() || zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(REVB_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::revh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0000 0101 ..10 0101 100. .... .... ....
+ // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(REVH_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::revw(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // REVW <Zd>.D, <Pg>/M, <Zn>.D
+ // 0000 0101 ..10 0110 100. .... .... ....
+ // size<23:22> | opc<17:16> = 10 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(REVW_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::splice(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0101 ..10 1100 100. .... .... ....
+ // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEPermuteVectorUnpredicated.
+
+void Assembler::dup(const ZRegister& zd, const Register& xn) {
+ // DUP <Zd>.<T>, <R><n|SP>
+ // 0000 0101 ..10 0000 0011 10.. .... ....
+ // size<23:22> | Rn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(DUP_z_r | SVESize(zd) | Rd(zd) | RnSP(xn));
+}
+
+void Assembler::dup(const ZRegister& zd, const ZRegister& zn, unsigned index) {
+ // DUP <Zd>.<T>, <Zn>.<T>[<imm>]
+ // 0000 0101 ..1. .... 0010 00.. .... ....
+ // imm2<23:22> | tsz<20:16> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ VIXL_ASSERT((index * zd.GetLaneSizeInBits()) < 512);
+ int n = zd.GetLaneSizeInBytesLog2();
+ unsigned imm_7 = (index << (n + 1)) | (1 << n);
+ VIXL_ASSERT(IsUint7(imm_7));
+ unsigned imm_2 = ExtractUnsignedBitfield32(6, 5, imm_7);
+ unsigned tsz_5 = ExtractUnsignedBitfield32(4, 0, imm_7);
+
+ Emit(DUP_z_zi | ImmUnsignedField<23, 22>(imm_2) |
+ ImmUnsignedField<20, 16>(tsz_5) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::insr(const ZRegister& zdn, const Register& rm) {
+ // INSR <Zdn>.<T>, <R><m>
+ // 0000 0101 ..10 0100 0011 10.. .... ....
+ // size<23:22> | Rm<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(INSR_z_r | SVESize(zdn) | Rd(zdn) | Rn(rm));
+}
+
+void Assembler::insr(const ZRegister& zdn, const VRegister& vm) {
+ // INSR <Zdn>.<T>, <V><m>
+ // 0000 0101 ..11 0100 0011 10.. .... ....
+ // size<23:22> | Vm<9:5> | Zdn<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(vm.IsScalar());
+
+ Emit(INSR_z_v | SVESize(zdn) | Rd(zdn) | Rn(vm));
+}
+
+void Assembler::rev(const ZRegister& zd, const ZRegister& zn) {
+ // REV <Zd>.<T>, <Zn>.<T>
+ // 0000 0101 ..11 1000 0011 10.. .... ....
+ // size<23:22> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+
+ Emit(REV_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::sunpkhi(const ZRegister& zd, const ZRegister& zn) {
+ // SUNPKHI <Zd>.<T>, <Zn>.<Tb>
+ // 0000 0101 ..11 0001 0011 10.. .... ....
+ // size<23:22> | U<17> = 0 | H<16> = 1 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+
+ Emit(SUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::sunpklo(const ZRegister& zd, const ZRegister& zn) {
+ // SUNPKLO <Zd>.<T>, <Zn>.<Tb>
+ // 0000 0101 ..11 0000 0011 10.. .... ....
+ // size<23:22> | U<17> = 0 | H<16> = 0 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+
+ Emit(SUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::tbl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // TBL <Zd>.<T>, { <Zn>.<T> }, <Zm>.<T>
+ // 0000 0101 ..1. .... 0011 00.. .... ....
+ // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+ Emit(TBL_z_zz_1 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uunpkhi(const ZRegister& zd, const ZRegister& zn) {
+ // UUNPKHI <Zd>.<T>, <Zn>.<Tb>
+ // 0000 0101 ..11 0011 0011 10.. .... ....
+ // size<23:22> | U<17> = 1 | H<16> = 1 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+
+ Emit(UUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::uunpklo(const ZRegister& zd, const ZRegister& zn) {
+ // UUNPKLO <Zd>.<T>, <Zn>.<Tb>
+ // 0000 0101 ..11 0010 0011 10.. .... ....
+ // size<23:22> | U<17> = 1 | H<16> = 0 | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+
+ Emit(UUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+// SVEPredicateCount.
+
+void Assembler::cntp(const Register& xd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ // CNTP <Xd>, <Pg>, <Pn>.<T>
+ // 0010 0101 ..10 0000 10.. ..0. .... ....
+ // size<23:22> | opc<18:16> = 000 | Pg<13:10> | o2<9> = 0 | Pn<8:5> | Rd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xd.IsX());
+ VIXL_ASSERT(pg.IsUnqualified());
+ if (pg.HasLaneSize()) VIXL_ASSERT(AreSameFormat(pg, pn));
+
+ Emit(CNTP_r_p_p | SVESize(pn) | Rd(xd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+// SVEPredicateLogicalOp.
+void Assembler::and_(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(AND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::ands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(ANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::bic(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(BIC_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::bics(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(BICS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::eor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(EOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::eors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(EORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nand(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(NAND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(NANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(NOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(NORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(ORN_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(ORNS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orr(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(ORR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orrs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ Emit(ORRS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::sel(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ Emit(SEL_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+// SVEPredicateMisc.
+
+void Assembler::pfalse(const PRegisterWithLaneSize& pd) {
+ // PFALSE <Pd>.B
+ // 0010 0101 0001 1000 1110 0100 0000 ....
+ // op<23> = 0 | S<22> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ // Ignore the lane size, since it makes no difference to the operation.
+
+ Emit(PFALSE_p | Pd(pd));
+}
+
+void Assembler::pfirst(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ // PFIRST <Pdn>.B, <Pg>, <Pdn>.B
+ // 0010 0101 0101 1000 1100 000. ...0 ....
+ // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pdn<3:0>
+
+ USE(pn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.Is(pn));
+ VIXL_ASSERT(pd.IsLaneSizeB());
+
+ Emit(PFIRST_p_p_p | Pd(pd) | Pg<8, 5>(pg));
+}
+
+void Assembler::pnext(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ // PNEXT <Pdn>.<T>, <Pg>, <Pdn>.<T>
+ // 0010 0101 ..01 1001 1100 010. ...0 ....
+ // size<23:22> | Pg<8:5> | Pdn<3:0>
+
+ USE(pn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pd.Is(pn));
+
+ Emit(PNEXT_p_p_p | SVESize(pd) | Pd(pd) | Pg<8, 5>(pg));
+}
+
+void Assembler::ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) {
+ // PTEST <Pg>, <Pn>.B
+ // 0010 0101 0101 0000 11.. ..0. ...0 0000
+ // op<23> = 0 | S<22> = 1 | Pg<13:10> | Pn<8:5> | opc2<3:0> = 0000
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(pn.IsLaneSizeB());
+
+ Emit(PTEST_p_p | Pg<13, 10>(pg) | Rx<8, 5>(pn));
+}
+
+void Assembler::ptrue(const PRegisterWithLaneSize& pd, int pattern) {
+ // PTRUE <Pd>.<T>{, <pattern>}
+ // 0010 0101 ..01 1000 1110 00.. ...0 ....
+ // size<23:22> | S<16> = 0 | pattern<9:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(PTRUE_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern));
+}
+
+void Assembler::ptrues(const PRegisterWithLaneSize& pd, int pattern) {
+ // PTRUES <Pd>.<T>{, <pattern>}
+ // 0010 0101 ..01 1001 1110 00.. ...0 ....
+ // size<23:22> | S<16> = 1 | pattern<9:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(PTRUES_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern));
+}
+
+void Assembler::rdffr(const PRegisterWithLaneSize& pd) {
+ // RDFFR <Pd>.B
+ // 0010 0101 0001 1001 1111 0000 0000 ....
+ // op<23> = 0 | S<22> = 0 | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(RDFFR_p_f | Pd(pd));
+}
+
+void Assembler::rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+ // RDFFR <Pd>.B, <Pg>/Z
+ // 0010 0101 0001 1000 1111 000. ...0 ....
+ // op<23> = 0 | S<22> = 0 | Pg<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(RDFFR_p_p_f | Pd(pd) | Pg<8, 5>(pg));
+}
+
+void Assembler::rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+ // RDFFRS <Pd>.B, <Pg>/Z
+ // 0010 0101 0101 1000 1111 000. ...0 ....
+ // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(RDFFRS_p_p_f | Pd(pd) | Pg<8, 5>(pg));
+}
+
+// SVEPropagateBreak.
+
+void Assembler::brkpa(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+ // 0010 0101 0000 .... 11.. ..0. ...0 ....
+ // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(BRKPA_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::brkpas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+ // 0010 0101 0100 .... 11.. ..0. ...0 ....
+ // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(BRKPAS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::brkpb(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+ // 0010 0101 0000 .... 11.. ..0. ...1 ....
+ // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(BRKPB_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::brkpbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+ // 0010 0101 0100 .... 11.. ..0. ...1 ....
+ // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(BRKPBS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+// SVEStackFrameAdjustment.
+
+void Assembler::addpl(const Register& xd, const Register& xn, int imm6) {
+ // ADDPL <Xd|SP>, <Xn|SP>, #<imm>
+ // 0000 0100 011. .... 0101 0... .... ....
+ // op<22> = 1 | Rn<20:16> | imm6<10:5> | Rd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xd.IsX());
+ VIXL_ASSERT(xn.IsX());
+
+ Emit(ADDPL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6));
+}
+
+void Assembler::addvl(const Register& xd, const Register& xn, int imm6) {
+ // ADDVL <Xd|SP>, <Xn|SP>, #<imm>
+ // 0000 0100 001. .... 0101 0... .... ....
+ // op<22> = 0 | Rn<20:16> | imm6<10:5> | Rd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xd.IsX());
+ VIXL_ASSERT(xn.IsX());
+
+ Emit(ADDVL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6));
+}
+
+// SVEStackFrameSize.
+
+void Assembler::rdvl(const Register& xd, int imm6) {
+ // RDVL <Xd>, #<imm>
+ // 0000 0100 1011 1111 0101 0... .... ....
+ // op<22> = 0 | opc2<20:16> = 11111 | imm6<10:5> | Rd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(xd.IsX());
+
+ Emit(RDVL_r_i | Rd(xd) | ImmField<10, 5>(imm6));
+}
+
+// SVEVectorSelect.
+
+void Assembler::sel(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(SEL_z_p_zz | SVESize(zd) | Rd(zd) | Pg<13, 10>(pg) | Rn(zn) | Rm(zm));
+}
+
+// SVEWriteFFR.
+
+void Assembler::setffr() {
+ // SETFFR
+ // 0010 0101 0010 1100 1001 0000 0000 0000
+ // opc<23:22> = 00
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(SETFFR_f);
+}
+
+void Assembler::wrffr(const PRegisterWithLaneSize& pn) {
+ // WRFFR <Pn>.B
+ // 0010 0101 0010 1000 1001 000. ...0 0000
+ // opc<23:22> = 00 | Pn<8:5>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+ Emit(WRFFR_f_p | Rx<8, 5>(pn));
+}
+
+// Aliases.
+
+void Assembler::bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ and_(zd, zn, ~imm);
+}
+
+void Assembler::eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ eor(zd, zn, ~imm);
+}
+
+void Assembler::orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ orr(zd, zn, ~imm);
+}
+
+
+void Assembler::fmov(const ZRegister& zd, const PRegisterM& pg, double imm) {
+ if (IsPositiveZero(imm)) {
+ cpy(zd, pg, 0);
+ } else {
+ fcpy(zd, pg, imm);
+ }
+}
+
+void Assembler::fmov(const ZRegister& zd, double imm) {
+ if (IsPositiveZero(imm)) {
+ dup(zd, imm);
+ } else {
+ fdup(zd, imm);
+ }
+}
+
+void Assembler::mov(const PRegister& pd, const PRegister& pn) {
+ // If the inputs carry a lane size, they must match.
+ VIXL_ASSERT((!pd.HasLaneSize() && !pn.HasLaneSize()) ||
+ AreSameLaneSize(pd, pn));
+ orr(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB());
+}
+
+void Assembler::mov(const PRegisterWithLaneSize& pd,
+ const PRegisterM& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ sel(pd, pg, pn, pd);
+}
+
+void Assembler::mov(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ and_(pd, pg, pn, pn);
+}
+
+void Assembler::mov(const ZRegister& zd,
+ const PRegister& pg,
+ int imm8,
+ int shift) {
+ VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+ cpy(zd, pg, imm8, shift);
+}
+
+void Assembler::mov(const ZRegister& zd, const Register& xn) { dup(zd, xn); }
+
+void Assembler::mov(const ZRegister& zd, const VRegister& vn) {
+ VIXL_ASSERT(vn.IsScalar());
+ VIXL_ASSERT(AreSameLaneSize(zd, vn));
+ dup(zd, vn.Z().WithSameLaneSizeAs(vn), 0);
+}
+
+void Assembler::mov(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ orr(zd.VnD(), zn.VnD(), zn.VnD());
+}
+
+void Assembler::mov(const ZRegister& zd, const ZRegister& zn, unsigned index) {
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ dup(zd, zn, index);
+}
+
+void Assembler::mov(const ZRegister& zd,
+ const PRegisterM& pg,
+ const Register& rn) {
+ cpy(zd, pg, rn);
+}
+
+void Assembler::mov(const ZRegister& zd,
+ const PRegisterM& pg,
+ const VRegister& vn) {
+ VIXL_ASSERT(vn.IsScalar());
+ VIXL_ASSERT(AreSameLaneSize(zd, vn));
+ cpy(zd, pg, vn);
+}
+
+void Assembler::mov(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ sel(zd, pg, zn, zd);
+}
+
+void Assembler::mov(const ZRegister& zd, uint64_t imm) {
+ // Mov is an alias of dupm for certain values of imm. Whilst this matters in
+ // the disassembler, for the assembler, we don't distinguish between the
+ // two mnemonics, and simply call dupm.
+ dupm(zd, imm);
+}
+
+void Assembler::mov(const ZRegister& zd, int imm8, int shift) {
+ dup(zd, imm8, shift);
+}
+
+void Assembler::movs(const PRegister& pd, const PRegister& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ orrs(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB());
+}
+
+void Assembler::movs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ ands(pd, pg, pn, pn);
+}
+
+void Assembler::not_(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ eor(pd, pg, pn, pg.VnB());
+}
+
+void Assembler::nots(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ eors(pd, pg, pn, pg.VnB());
+}
+
+} // namespace aarch64
+} // namespace vixl
diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h
index 36f5568d..bf93918d 100644
--- a/src/aarch64/constants-aarch64.h
+++ b/src/aarch64/constants-aarch64.h
@@ -34,6 +34,8 @@ namespace aarch64 {
const unsigned kNumberOfRegisters = 32;
const unsigned kNumberOfVRegisters = 32;
+const unsigned kNumberOfZRegisters = kNumberOfVRegisters;
+const unsigned kNumberOfPRegisters = 16;
// Callee saved registers are x21-x30(lr).
const int kNumberOfCalleeSavedRegisters = 10;
const int kFirstCalleeSavedRegisterIndex = 21;
@@ -41,14 +43,34 @@ const int kFirstCalleeSavedRegisterIndex = 21;
// still caller-saved.
const int kNumberOfCalleeSavedFPRegisters = 8;
const int kFirstCalleeSavedFPRegisterIndex = 8;
+// All predicated instructions accept at least p0-p7 as the governing predicate.
+const unsigned kNumberOfGoverningPRegisters = 8;
// clang-format off
+#define AARCH64_P_REGISTER_CODE_LIST(R) \
+ R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \
+ R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15)
+
#define AARCH64_REGISTER_CODE_LIST(R) \
R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \
R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \
R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
+// SVE loads and stores use "w" instead of "s" for word-sized accesses, so the
+// mapping from the load/store variant to constants like k*RegSize is irregular.
+#define VIXL_SVE_LOAD_STORE_VARIANT_LIST(V) \
+ V(b, B) \
+ V(h, H) \
+ V(w, S) \
+ V(d, D)
+
+// Sign-extending loads don't have double-word variants.
+#define VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(V) \
+ V(b, B) \
+ V(h, H) \
+ V(w, S)
+
#define INSTRUCTION_FIELDS_LIST(V_) \
/* Register fields */ \
V_(Rd, 4, 0, ExtractBits) /* Destination register. */ \
@@ -59,6 +81,11 @@ V_(Ra, 14, 10, ExtractBits) /* Third source register. */ \
V_(Rt, 4, 0, ExtractBits) /* Load/store register. */ \
V_(Rt2, 14, 10, ExtractBits) /* Load/store second register. */ \
V_(Rs, 20, 16, ExtractBits) /* Exclusive access status. */ \
+V_(Pt, 3, 0, ExtractBits) /* Load/store register (p0-p7). */ \
+V_(Pd, 3, 0, ExtractBits) /* SVE destination predicate register. */ \
+V_(Pn, 8, 5, ExtractBits) /* SVE first source predicate register. */ \
+V_(Pm, 19, 16, ExtractBits) /* SVE second source predicate register.*/ \
+V_(PgLow8, 12, 10, ExtractBits) /* Governing predicate (p0-p7). */ \
\
/* Common bits */ \
V_(SixtyFourBits, 31, 31, ExtractBits) \
@@ -74,7 +101,7 @@ V_(ImmDPShift, 15, 10, ExtractBits) \
\
/* Add/subtract immediate */ \
V_(ImmAddSub, 21, 10, ExtractBits) \
-V_(ShiftAddSub, 23, 22, ExtractBits) \
+V_(ImmAddSubShift, 22, 22, ExtractBits) \
\
/* Add/substract extend */ \
V_(ImmExtendShift, 12, 10, ExtractBits) \
@@ -177,7 +204,23 @@ V_(NEONCmode, 15, 12, ExtractBits) \
/* NEON Shift Immediate fields */ \
V_(ImmNEONImmhImmb, 22, 16, ExtractBits) \
V_(ImmNEONImmh, 22, 19, ExtractBits) \
-V_(ImmNEONImmb, 18, 16, ExtractBits)
+V_(ImmNEONImmb, 18, 16, ExtractBits) \
+ \
+/* SVE generic fields */ \
+V_(SVESize, 23, 22, ExtractBits) \
+V_(ImmSVEVLScale, 10, 5, ExtractSignedBits) \
+V_(ImmSVEIntWideSigned, 12, 5, ExtractSignedBits) \
+V_(ImmSVEIntWideUnsigned, 12, 5, ExtractBits) \
+V_(ImmSVEPredicateConstraint, 9, 5, ExtractBits) \
+ \
+/* SVE Bitwise Immediate bitfield */ \
+V_(SVEBitN, 17, 17, ExtractBits) \
+V_(SVEImmRotate, 16, 11, ExtractBits) \
+V_(SVEImmSetBits, 10, 5, ExtractBits) \
+ \
+V_(SVEImmPrefetchOperation, 3, 0, ExtractBits) \
+V_(SVEPrefetchHint, 3, 3, ExtractBits)
+
// clang-format on
#define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \
@@ -235,7 +278,22 @@ enum Condition {
// Aliases.
hs = cs, // C set Unsigned higher or same.
- lo = cc // C clear Unsigned lower.
+ lo = cc, // C clear Unsigned lower.
+
+ // Floating-point additional condition code.
+ uo, // Unordered comparison.
+
+ // SVE predicate condition aliases.
+ sve_none = eq, // No active elements were true.
+ sve_any = ne, // An active element was true.
+ sve_nlast = cs, // The last element was not true.
+ sve_last = cc, // The last element was true.
+ sve_first = mi, // The first element was true.
+ sve_nfrst = pl, // The first element was not true.
+ sve_pmore = hi, // An active element was true but not the last element.
+ sve_plast = ls, // The last active element was true or no active elements were true.
+ sve_tcont = ge, // CTERM termination condition not deleted.
+ sve_tstop = lt // CTERM termination condition deleted.
};
inline Condition InvertCondition(Condition cond) {
@@ -279,7 +337,12 @@ enum StatusFlags {
FPEqualFlag = ZCFlag,
FPLessThanFlag = NFlag,
FPGreaterThanFlag = CFlag,
- FPUnorderedFlag = CVFlag
+ FPUnorderedFlag = CVFlag,
+
+ // SVE condition flags.
+ SVEFirstFlag = NFlag,
+ SVENoneFlag = ZFlag,
+ SVENotLastFlag = CFlag
};
enum Shift {
@@ -303,6 +366,17 @@ enum Extend {
SXTX = 7
};
+enum SVEOffsetModifier {
+ NO_SVE_OFFSET_MODIFIER,
+ // Multiply (each element of) the offset by either the vector or predicate
+ // length, according to the context.
+ SVE_MUL_VL,
+ // Shift or extend modifiers (as in `Shift` or `Extend`).
+ SVE_LSL,
+ SVE_UXTW,
+ SVE_SXTW
+};
+
enum SystemHint {
NOP = 0,
YIELD = 1,
@@ -368,6 +442,12 @@ enum PrefetchOperation {
PSTL3STRM = 0x15
};
+constexpr bool IsNamedPrefetchOperation(int op) {
+ return ((op >= PLDL1KEEP) && (op <= PLDL3STRM)) ||
+ ((op >= PLIL1KEEP) && (op <= PLIL3STRM)) ||
+ ((op >= PSTL1KEEP) && (op <= PSTL3STRM));
+}
+
enum BType {
// Set when executing any instruction on a guarded page, except those cases
// listed below.
@@ -429,6 +509,36 @@ enum DataCacheOp {
ZVA = CacheOpEncoder<3, 7, 4, 1>::value
};
+// Some SVE instructions support a predicate constraint pattern. This is
+// interpreted as a VL-dependent value, and is typically used to initialise
+// predicates, or to otherwise limit the number of processed elements.
+enum SVEPredicateConstraint {
+ // Select 2^N elements, for the largest possible N.
+ SVE_POW2 = 0x0,
+ // Each VL<N> selects exactly N elements if possible, or zero if N is greater
+ // than the number of elements. Note that the encoding values for VL<N> are
+ // not linearly related to N.
+ SVE_VL1 = 0x1,
+ SVE_VL2 = 0x2,
+ SVE_VL3 = 0x3,
+ SVE_VL4 = 0x4,
+ SVE_VL5 = 0x5,
+ SVE_VL6 = 0x6,
+ SVE_VL7 = 0x7,
+ SVE_VL8 = 0x8,
+ SVE_VL16 = 0x9,
+ SVE_VL32 = 0xa,
+ SVE_VL64 = 0xb,
+ SVE_VL128 = 0xc,
+ SVE_VL256 = 0xd,
+ // Each MUL<N> selects the largest multiple of N elements that the vector
+ // length supports. Note that for D-sized lanes, this can be zero.
+ SVE_MUL4 = 0x1d,
+ SVE_MUL3 = 0x1e,
+ // Select all elements.
+ SVE_ALL = 0x1f
+};
+
// Instruction enumerations.
//
// These are the masks that define a class of instructions, and the list of
@@ -503,6 +613,14 @@ enum NEONScalarFormatField {
NEON_D = 0x00C00000
};
+enum SVESizeField {
+ SVESizeFieldMask = 0x00C00000,
+ SVE_B = 0x00000000,
+ SVE_H = 0x00400000,
+ SVE_S = 0x00800000,
+ SVE_D = 0x00C00000
+};
+
// PC relative addressing.
enum PCRelAddressingOp {
PCRelAddressingFixed = 0x10000000,
@@ -531,8 +649,8 @@ enum AddSubOp {
enum AddSubImmediateOp {
AddSubImmediateFixed = 0x11000000,
- AddSubImmediateFMask = 0x1F000000,
- AddSubImmediateMask = 0xFF000000,
+ AddSubImmediateFMask = 0x1F800000,
+ AddSubImmediateMask = 0xFF800000,
#define ADD_SUB_IMMEDIATE(A) \
A##_w_imm = AddSubImmediateFixed | A, \
A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits
@@ -2660,11 +2778,1626 @@ enum NEONScalarShiftImmediateOp {
NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm
};
+enum SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsOp {
+ SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed = 0x84A00000,
+ SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000,
+ SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000,
+ LD1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed,
+ LDFF1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00002000,
+ LD1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000,
+ LDFF1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsOp {
+ SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed = 0x85200000,
+ SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000,
+ SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000,
+ LD1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000,
+ LDFF1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsOp {
+ SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed = 0x84000000,
+ SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE208000,
+ SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask = 0xFFA0E000,
+ LD1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed,
+ LDFF1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00002000,
+ LD1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00004000,
+ LDFF1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00006000,
+ LD1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000,
+ LDFF1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00802000,
+ LD1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00804000,
+ LDFF1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00806000,
+ LD1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01004000,
+ LDFF1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01006000
+};
+
+enum SVE32BitGatherLoad_VectorPlusImmOp {
+ SVE32BitGatherLoad_VectorPlusImmFixed = 0x84208000,
+ SVE32BitGatherLoad_VectorPlusImmFMask = 0xFE608000,
+ SVE32BitGatherLoad_VectorPlusImmMask = 0xFFE0E000,
+ LD1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed,
+ LDFF1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00002000,
+ LD1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00004000,
+ LDFF1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00006000,
+ LD1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00800000,
+ LDFF1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00802000,
+ LD1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00804000,
+ LDFF1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00806000,
+ LD1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01004000,
+ LDFF1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01006000
+};
+
+enum SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsOp {
+ SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed = 0x84200000,
+ SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08010,
+ SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E010,
+ PRFB_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed,
+ PRFH_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00002000,
+ PRFW_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00004000,
+ PRFD_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE32BitGatherPrefetch_VectorPlusImmOp {
+ SVE32BitGatherPrefetch_VectorPlusImmFixed = 0x8400E000,
+ SVE32BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010,
+ SVE32BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010,
+ PRFB_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed,
+ PRFH_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x00800000,
+ PRFW_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01000000,
+ PRFD_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01800000
+};
+
+enum SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsOp {
+ SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed = 0xE4608000,
+ SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFMask = 0xFE60A000,
+ SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask = 0xFFE0A000,
+ ST1H_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x00800000,
+ ST1W_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x01000000
+};
+
+enum SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsOp {
+ SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed = 0xE4408000,
+ SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE60A000,
+ SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask = 0xFFE0A000,
+ ST1B_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed,
+ ST1H_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000,
+ ST1W_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x01000000
+};
+
+enum SVE32BitScatterStore_VectorPlusImmOp {
+ SVE32BitScatterStore_VectorPlusImmFixed = 0xE460A000,
+ SVE32BitScatterStore_VectorPlusImmFMask = 0xFE60E000,
+ SVE32BitScatterStore_VectorPlusImmMask = 0xFFE0E000,
+ ST1B_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed,
+ ST1H_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x00800000,
+ ST1W_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x01000000
+};
+
+enum SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsOp {
+ SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed = 0xC4200000,
+ SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFMask = 0xFE208000,
+ SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask = 0xFFA0E000,
+ LD1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00800000,
+ LDFF1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00802000,
+ LD1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00804000,
+ LDFF1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00806000,
+ LD1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01000000,
+ LDFF1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01002000,
+ LD1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01004000,
+ LDFF1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01006000,
+ LD1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01804000,
+ LDFF1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsOp {
+ SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000,
+ SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFMask = 0xFE608000,
+ SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000,
+ LD1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00800000,
+ LDFF1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00802000,
+ LD1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00804000,
+ LDFF1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00806000,
+ LD1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01000000,
+ LDFF1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01002000,
+ LD1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01004000,
+ LDFF1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01006000,
+ LD1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01804000,
+ LDFF1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsOp {
+ SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed = 0xC4408000,
+ SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE608000,
+ SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000,
+ LD1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed,
+ LDFF1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00002000,
+ LD1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00004000,
+ LDFF1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00006000,
+ LD1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000,
+ LDFF1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00802000,
+ LD1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00804000,
+ LDFF1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00806000,
+ LD1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000,
+ LDFF1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01002000,
+ LD1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01004000,
+ LDFF1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01006000,
+ LD1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01804000,
+ LDFF1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsOp {
+ SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xC4000000,
+ SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE208000,
+ SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFA0E000,
+ LD1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed,
+ LDFF1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00002000,
+ LD1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00004000,
+ LDFF1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00006000,
+ LD1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000,
+ LDFF1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00802000,
+ LD1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00804000,
+ LDFF1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00806000,
+ LD1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000,
+ LDFF1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01002000,
+ LD1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01004000,
+ LDFF1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01006000,
+ LD1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01804000,
+ LDFF1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_VectorPlusImmOp {
+ SVE64BitGatherLoad_VectorPlusImmFixed = 0xC4208000,
+ SVE64BitGatherLoad_VectorPlusImmFMask = 0xFE608000,
+ SVE64BitGatherLoad_VectorPlusImmMask = 0xFFE0E000,
+ LD1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed,
+ LDFF1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00002000,
+ LD1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00004000,
+ LDFF1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00006000,
+ LD1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00800000,
+ LDFF1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00802000,
+ LD1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00804000,
+ LDFF1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00806000,
+ LD1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01000000,
+ LDFF1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01002000,
+ LD1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01004000,
+ LDFF1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01006000,
+ LD1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01804000,
+ LDFF1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01806000
+};
+
+enum SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsOp {
+ SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000,
+ SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFMask = 0xFFE08010,
+ SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E010,
+ PRFB_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed,
+ PRFH_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00002000,
+ PRFW_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00004000,
+ PRFD_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsOp {
+ SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xC4200000,
+ SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFFA08010,
+ SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFA0E010,
+ PRFB_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed,
+ PRFH_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00002000,
+ PRFW_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00004000,
+ PRFD_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE64BitGatherPrefetch_VectorPlusImmOp {
+ SVE64BitGatherPrefetch_VectorPlusImmFixed = 0xC400E000,
+ SVE64BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010,
+ SVE64BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010,
+ PRFB_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed,
+ PRFH_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x00800000,
+ PRFW_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01000000,
+ PRFD_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsOp {
+ SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed = 0xE420A000,
+ SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFMask = 0xFE60E000,
+ SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000,
+ ST1H_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x00800000,
+ ST1W_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01000000,
+ ST1D_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsOp {
+ SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed = 0xE400A000,
+ SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE60E000,
+ SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000,
+ ST1B_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed,
+ ST1H_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000,
+ ST1W_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000,
+ ST1D_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsOp {
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xE4208000,
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFE60A000,
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFE0A000,
+ ST1H_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00800000,
+ ST1W_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01000000,
+ ST1D_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsOp {
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xE4008000,
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE60A000,
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFE0A000,
+ ST1B_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed,
+ ST1H_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000,
+ ST1W_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000,
+ ST1D_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_VectorPlusImmOp {
+ SVE64BitScatterStore_VectorPlusImmFixed = 0xE440A000,
+ SVE64BitScatterStore_VectorPlusImmFMask = 0xFE60E000,
+ SVE64BitScatterStore_VectorPlusImmMask = 0xFFE0E000,
+ ST1B_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed,
+ ST1H_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x00800000,
+ ST1W_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01000000,
+ ST1D_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01800000
+};
+
+enum SVEAddressGenerationOp {
+ SVEAddressGenerationFixed = 0x0420A000,
+ SVEAddressGenerationFMask = 0xFF20F000,
+ SVEAddressGenerationMask = 0xFFE0F000,
+ ADR_z_az_d_s32_scaled = SVEAddressGenerationFixed,
+ ADR_z_az_d_u32_scaled = SVEAddressGenerationFixed | 0x00400000,
+ ADR_z_az_s_same_scaled = SVEAddressGenerationFixed | 0x00800000,
+ ADR_z_az_d_same_scaled = SVEAddressGenerationFixed | 0x00C00000
+};
+
+enum SVEBitwiseLogicalUnpredicatedOp {
+ SVEBitwiseLogicalUnpredicatedFixed = 0x04202000,
+ SVEBitwiseLogicalUnpredicatedFMask = 0xFF20E000,
+ SVEBitwiseLogicalUnpredicatedMask = 0xFFE0FC00,
+ AND_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00001000,
+ ORR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00401000,
+ EOR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00801000,
+ BIC_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00C01000
+};
+
+enum SVEBitwiseLogicalWithImm_UnpredicatedOp {
+ SVEBitwiseLogicalWithImm_UnpredicatedFixed = 0x05000000,
+ SVEBitwiseLogicalWithImm_UnpredicatedFMask = 0xFF3C0000,
+ SVEBitwiseLogicalWithImm_UnpredicatedMask = 0xFFFC0000,
+ ORR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed,
+ EOR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00400000,
+ AND_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00800000
+};
+
+enum SVEBitwiseLogical_PredicatedOp {
+ SVEBitwiseLogical_PredicatedFixed = 0x04180000,
+ SVEBitwiseLogical_PredicatedFMask = 0xFF38E000,
+ SVEBitwiseLogical_PredicatedMask = 0xFF3FE000,
+ ORR_z_p_zz = SVEBitwiseLogical_PredicatedFixed,
+ EOR_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00010000,
+ AND_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00020000,
+ BIC_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00030000
+};
+
+enum SVEBitwiseShiftByImm_PredicatedOp {
+ SVEBitwiseShiftByImm_PredicatedFixed = 0x04008000,
+ SVEBitwiseShiftByImm_PredicatedFMask = 0xFF30E000,
+ SVEBitwiseShiftByImm_PredicatedMask = 0xFF3FE000,
+ ASR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed,
+ LSR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00010000,
+ LSL_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00030000,
+ ASRD_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00040000
+};
+
+enum SVEBitwiseShiftByVector_PredicatedOp {
+ SVEBitwiseShiftByVector_PredicatedFixed = 0x04108000,
+ SVEBitwiseShiftByVector_PredicatedFMask = 0xFF38E000,
+ SVEBitwiseShiftByVector_PredicatedMask = 0xFF3FE000,
+ ASR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed,
+ LSR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00010000,
+ LSL_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00030000,
+ ASRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00040000,
+ LSRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00050000,
+ LSLR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00070000
+};
+
+enum SVEBitwiseShiftByWideElements_PredicatedOp {
+ SVEBitwiseShiftByWideElements_PredicatedFixed = 0x04188000,
+ SVEBitwiseShiftByWideElements_PredicatedFMask = 0xFF38E000,
+ SVEBitwiseShiftByWideElements_PredicatedMask = 0xFF3FE000,
+ ASR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed,
+ LSR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00010000,
+ LSL_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00030000
+};
+
+enum SVEBitwiseShiftUnpredicatedOp {
+ SVEBitwiseShiftUnpredicatedFixed = 0x04208000,
+ SVEBitwiseShiftUnpredicatedFMask = 0xFF20E000,
+ SVEBitwiseShiftUnpredicatedMask = 0xFF20FC00,
+ ASR_z_zw = SVEBitwiseShiftUnpredicatedFixed,
+ LSR_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000400,
+ LSL_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000C00,
+ ASR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001000,
+ LSR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001400,
+ LSL_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001C00
+};
+
+enum SVEBroadcastBitmaskImmOp {
+ SVEBroadcastBitmaskImmFixed = 0x05C00000,
+ SVEBroadcastBitmaskImmFMask = 0xFFFC0000,
+ SVEBroadcastBitmaskImmMask = 0xFFFC0000,
+ DUPM_z_i = SVEBroadcastBitmaskImmFixed
+};
+
+enum SVEBroadcastFPImm_UnpredicatedOp {
+ SVEBroadcastFPImm_UnpredicatedFixed = 0x2539C000,
+ SVEBroadcastFPImm_UnpredicatedFMask = 0xFF39C000,
+ SVEBroadcastFPImm_UnpredicatedMask = 0xFF3FE000,
+ FDUP_z_i = SVEBroadcastFPImm_UnpredicatedFixed
+};
+
+enum SVEBroadcastGeneralRegisterOp {
+ SVEBroadcastGeneralRegisterFixed = 0x05203800,
+ SVEBroadcastGeneralRegisterFMask = 0xFF3FFC00,
+ SVEBroadcastGeneralRegisterMask = 0xFF3FFC00,
+ DUP_z_r = SVEBroadcastGeneralRegisterFixed
+};
+
+enum SVEBroadcastIndexElementOp {
+ SVEBroadcastIndexElementFixed = 0x05202000,
+ SVEBroadcastIndexElementFMask = 0xFF20FC00,
+ SVEBroadcastIndexElementMask = 0xFF20FC00,
+ DUP_z_zi = SVEBroadcastIndexElementFixed
+};
+
+enum SVEBroadcastIntImm_UnpredicatedOp {
+ SVEBroadcastIntImm_UnpredicatedFixed = 0x2538C000,
+ SVEBroadcastIntImm_UnpredicatedFMask = 0xFF39C000,
+ SVEBroadcastIntImm_UnpredicatedMask = 0xFF3FC000,
+ DUP_z_i = SVEBroadcastIntImm_UnpredicatedFixed
+};
+
+enum SVECompressActiveElementsOp {
+ SVECompressActiveElementsFixed = 0x05A18000,
+ SVECompressActiveElementsFMask = 0xFFBFE000,
+ SVECompressActiveElementsMask = 0xFFBFE000,
+ COMPACT_z_p_z = SVECompressActiveElementsFixed
+};
+
+enum SVEConditionallyBroadcastElementToVectorOp {
+ SVEConditionallyBroadcastElementToVectorFixed = 0x05288000,
+ SVEConditionallyBroadcastElementToVectorFMask = 0xFF3EE000,
+ SVEConditionallyBroadcastElementToVectorMask = 0xFF3FE000,
+ CLASTA_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed,
+ CLASTB_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed | 0x00010000
+};
+
+enum SVEConditionallyExtractElementToGeneralRegisterOp {
+ SVEConditionallyExtractElementToGeneralRegisterFixed = 0x0530A000,
+ SVEConditionallyExtractElementToGeneralRegisterFMask = 0xFF3EE000,
+ SVEConditionallyExtractElementToGeneralRegisterMask = 0xFF3FE000,
+ CLASTA_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed,
+ CLASTB_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed | 0x00010000
+};
+
+enum SVEConditionallyExtractElementToSIMDFPScalarOp {
+ SVEConditionallyExtractElementToSIMDFPScalarFixed = 0x052A8000,
+ SVEConditionallyExtractElementToSIMDFPScalarFMask = 0xFF3EE000,
+ SVEConditionallyExtractElementToSIMDFPScalarMask = 0xFF3FE000,
+ CLASTA_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed,
+ CLASTB_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed | 0x00010000
+};
+
+enum SVEConditionallyTerminateScalarsOp {
+ SVEConditionallyTerminateScalarsFixed = 0x25202000,
+ SVEConditionallyTerminateScalarsFMask = 0xFF20FC0F,
+ SVEConditionallyTerminateScalarsMask = 0xFFA0FC1F,
+ CTERMEQ_rr = SVEConditionallyTerminateScalarsFixed | 0x00800000,
+ CTERMNE_rr = SVEConditionallyTerminateScalarsFixed | 0x00800010
+};
+
+enum SVEConstructivePrefix_UnpredicatedOp {
+ SVEConstructivePrefix_UnpredicatedFixed = 0x0420BC00,
+ SVEConstructivePrefix_UnpredicatedFMask = 0xFF20FC00,
+ SVEConstructivePrefix_UnpredicatedMask = 0xFFFFFC00,
+ MOVPRFX_z_z = SVEConstructivePrefix_UnpredicatedFixed
+};
+
+enum SVEContiguousFirstFaultLoad_ScalarPlusScalarOp {
+ SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed = 0xA4006000,
+ SVEContiguousFirstFaultLoad_ScalarPlusScalarFMask = 0xFE00E000,
+ SVEContiguousFirstFaultLoad_ScalarPlusScalarMask = 0xFFE0E000,
+ LDFF1B_z_p_br_u8 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed,
+ LDFF1B_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00200000,
+ LDFF1B_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00400000,
+ LDFF1B_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00600000,
+ LDFF1SW_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00800000,
+ LDFF1H_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00A00000,
+ LDFF1H_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00C00000,
+ LDFF1H_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00E00000,
+ LDFF1SH_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01000000,
+ LDFF1SH_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01200000,
+ LDFF1W_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01400000,
+ LDFF1W_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01600000,
+ LDFF1SB_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01800000,
+ LDFF1SB_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01A00000,
+ LDFF1SB_z_p_br_s16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01C00000,
+ LDFF1D_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVEContiguousLoad_ScalarPlusImmOp {
+ SVEContiguousLoad_ScalarPlusImmFixed = 0xA400A000,
+ SVEContiguousLoad_ScalarPlusImmFMask = 0xFE10E000,
+ SVEContiguousLoad_ScalarPlusImmMask = 0xFFF0E000,
+ LD1B_z_p_bi_u8 = SVEContiguousLoad_ScalarPlusImmFixed,
+ LD1B_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00200000,
+ LD1B_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00400000,
+ LD1B_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00600000,
+ LD1SW_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00800000,
+ LD1H_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00A00000,
+ LD1H_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00C00000,
+ LD1H_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00E00000,
+ LD1SH_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01000000,
+ LD1SH_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01200000,
+ LD1W_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01400000,
+ LD1W_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01600000,
+ LD1SB_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01800000,
+ LD1SB_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01A00000,
+ LD1SB_z_p_bi_s16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01C00000,
+ LD1D_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVEContiguousLoad_ScalarPlusScalarOp {
+ SVEContiguousLoad_ScalarPlusScalarFixed = 0xA4004000,
+ SVEContiguousLoad_ScalarPlusScalarFMask = 0xFE00E000,
+ SVEContiguousLoad_ScalarPlusScalarMask = 0xFFE0E000,
+ LD1B_z_p_br_u8 = SVEContiguousLoad_ScalarPlusScalarFixed,
+ LD1B_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00200000,
+ LD1B_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00400000,
+ LD1B_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00600000,
+ LD1SW_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00800000,
+ LD1H_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00A00000,
+ LD1H_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00C00000,
+ LD1H_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00E00000,
+ LD1SH_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01000000,
+ LD1SH_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01200000,
+ LD1W_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01400000,
+ LD1W_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01600000,
+ LD1SB_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01800000,
+ LD1SB_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01A00000,
+ LD1SB_z_p_br_s16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01C00000,
+ LD1D_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVEContiguousNonFaultLoad_ScalarPlusImmOp {
+ SVEContiguousNonFaultLoad_ScalarPlusImmFixed = 0xA410A000,
+ SVEContiguousNonFaultLoad_ScalarPlusImmFMask = 0xFE10E000,
+ SVEContiguousNonFaultLoad_ScalarPlusImmMask = 0xFFF0E000,
+ LDNF1B_z_p_bi_u8 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed,
+ LDNF1B_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00200000,
+ LDNF1B_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00400000,
+ LDNF1B_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00600000,
+ LDNF1SW_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00800000,
+ LDNF1H_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00A00000,
+ LDNF1H_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00C00000,
+ LDNF1H_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00E00000,
+ LDNF1SH_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01000000,
+ LDNF1SH_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01200000,
+ LDNF1W_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01400000,
+ LDNF1W_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01600000,
+ LDNF1SB_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01800000,
+ LDNF1SB_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01A00000,
+ LDNF1SB_z_p_bi_s16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01C00000,
+ LDNF1D_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVEContiguousNonTemporalLoad_ScalarPlusImmOp {
+ SVEContiguousNonTemporalLoad_ScalarPlusImmFixed = 0xA400E000,
+ SVEContiguousNonTemporalLoad_ScalarPlusImmFMask = 0xFE70E000,
+ SVEContiguousNonTemporalLoad_ScalarPlusImmMask = 0xFFF0E000,
+ LDNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed,
+ LDNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x00800000,
+ LDNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01000000,
+ LDNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVEContiguousNonTemporalLoad_ScalarPlusScalarOp {
+ SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed = 0xA400C000,
+ SVEContiguousNonTemporalLoad_ScalarPlusScalarFMask = 0xFE60E000,
+ SVEContiguousNonTemporalLoad_ScalarPlusScalarMask = 0xFFE0E000,
+ LDNT1B_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed,
+ LDNT1H_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x00800000,
+ LDNT1W_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01000000,
+ LDNT1D_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVEContiguousNonTemporalStore_ScalarPlusImmOp {
+ SVEContiguousNonTemporalStore_ScalarPlusImmFixed = 0xE410E000,
+ SVEContiguousNonTemporalStore_ScalarPlusImmFMask = 0xFE70E000,
+ SVEContiguousNonTemporalStore_ScalarPlusImmMask = 0xFFF0E000,
+ STNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed,
+ STNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x00800000,
+ STNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01000000,
+ STNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVEContiguousNonTemporalStore_ScalarPlusScalarOp {
+ SVEContiguousNonTemporalStore_ScalarPlusScalarFixed = 0xE4006000,
+ SVEContiguousNonTemporalStore_ScalarPlusScalarFMask = 0xFE60E000,
+ SVEContiguousNonTemporalStore_ScalarPlusScalarMask = 0xFFE0E000,
+ STNT1B_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed,
+ STNT1H_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x00800000,
+ STNT1W_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01000000,
+ STNT1D_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVEContiguousPrefetch_ScalarPlusImmOp {
+ SVEContiguousPrefetch_ScalarPlusImmFixed = 0x85C00000,
+ SVEContiguousPrefetch_ScalarPlusImmFMask = 0xFFC08010,
+ SVEContiguousPrefetch_ScalarPlusImmMask = 0xFFC0E010,
+ PRFB_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed,
+ PRFH_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00002000,
+ PRFW_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00004000,
+ PRFD_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00006000
+};
+
+enum SVEContiguousPrefetch_ScalarPlusScalarOp {
+ SVEContiguousPrefetch_ScalarPlusScalarFixed = 0x8400C000,
+ SVEContiguousPrefetch_ScalarPlusScalarFMask = 0xFE60E010,
+ SVEContiguousPrefetch_ScalarPlusScalarMask = 0xFFE0E010,
+ PRFB_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed,
+ PRFH_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x00800000,
+ PRFW_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01000000,
+ PRFD_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVEContiguousStore_ScalarPlusImmOp {
+ SVEContiguousStore_ScalarPlusImmFixed = 0xE400E000,
+ SVEContiguousStore_ScalarPlusImmFMask = 0xFE10E000,
+ SVEContiguousStore_ScalarPlusImmMask = 0xFF90E000,
+ ST1B_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed,
+ ST1H_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x00800000,
+ ST1W_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01000000,
+ ST1D_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVEContiguousStore_ScalarPlusScalarOp {
+ SVEContiguousStore_ScalarPlusScalarFixed = 0xE4004000,
+ SVEContiguousStore_ScalarPlusScalarFMask = 0xFE00E000,
+ SVEContiguousStore_ScalarPlusScalarMask = 0xFF80E000,
+ ST1B_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed,
+ ST1H_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x00800000,
+ ST1W_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01000000,
+ ST1D_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVECopyFPImm_PredicatedOp {
+ SVECopyFPImm_PredicatedFixed = 0x0510C000,
+ SVECopyFPImm_PredicatedFMask = 0xFF30E000,
+ SVECopyFPImm_PredicatedMask = 0xFF30E000,
+ FCPY_z_p_i = SVECopyFPImm_PredicatedFixed
+};
+
+enum SVECopyGeneralRegisterToVector_PredicatedOp {
+ SVECopyGeneralRegisterToVector_PredicatedFixed = 0x0528A000,
+ SVECopyGeneralRegisterToVector_PredicatedFMask = 0xFF3FE000,
+ SVECopyGeneralRegisterToVector_PredicatedMask = 0xFF3FE000,
+ CPY_z_p_r = SVECopyGeneralRegisterToVector_PredicatedFixed
+};
+
+enum SVECopyIntImm_PredicatedOp {
+ SVECopyIntImm_PredicatedFixed = 0x05100000,
+ SVECopyIntImm_PredicatedFMask = 0xFF308000,
+ SVECopyIntImm_PredicatedMask = 0xFF308000,
+ CPY_z_p_i = SVECopyIntImm_PredicatedFixed
+};
+
+enum SVECopySIMDFPScalarRegisterToVector_PredicatedOp {
+ SVECopySIMDFPScalarRegisterToVector_PredicatedFixed = 0x05208000,
+ SVECopySIMDFPScalarRegisterToVector_PredicatedFMask = 0xFF3FE000,
+ SVECopySIMDFPScalarRegisterToVector_PredicatedMask = 0xFF3FE000,
+ CPY_z_p_v = SVECopySIMDFPScalarRegisterToVector_PredicatedFixed
+};
+
+enum SVEElementCountOp {
+ SVEElementCountFixed = 0x0420E000,
+ SVEElementCountFMask = 0xFF30F800,
+ SVEElementCountMask = 0xFFF0FC00,
+ CNTB_r_s = SVEElementCountFixed,
+ CNTH_r_s = SVEElementCountFixed | 0x00400000,
+ CNTW_r_s = SVEElementCountFixed | 0x00800000,
+ CNTD_r_s = SVEElementCountFixed | 0x00C00000
+};
+
+enum SVEExtractElementToGeneralRegisterOp {
+ SVEExtractElementToGeneralRegisterFixed = 0x0520A000,
+ SVEExtractElementToGeneralRegisterFMask = 0xFF3EE000,
+ SVEExtractElementToGeneralRegisterMask = 0xFF3FE000,
+ LASTA_r_p_z = SVEExtractElementToGeneralRegisterFixed,
+ LASTB_r_p_z = SVEExtractElementToGeneralRegisterFixed | 0x00010000
+};
+
+enum SVEExtractElementToSIMDFPScalarRegisterOp {
+ SVEExtractElementToSIMDFPScalarRegisterFixed = 0x05228000,
+ SVEExtractElementToSIMDFPScalarRegisterFMask = 0xFF3EE000,
+ SVEExtractElementToSIMDFPScalarRegisterMask = 0xFF3FE000,
+ LASTA_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed,
+ LASTB_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed | 0x00010000
+};
+
+enum SVEFFRInitialiseOp {
+ SVEFFRInitialiseFixed = 0x252C9000,
+ SVEFFRInitialiseFMask = 0xFF3FFFFF,
+ SVEFFRInitialiseMask = 0xFFFFFFFF,
+ SETFFR_f = SVEFFRInitialiseFixed
+};
+
+enum SVEFFRWriteFromPredicateOp {
+ SVEFFRWriteFromPredicateFixed = 0x25289000,
+ SVEFFRWriteFromPredicateFMask = 0xFF3FFE1F,
+ SVEFFRWriteFromPredicateMask = 0xFFFFFE1F,
+ WRFFR_f_p = SVEFFRWriteFromPredicateFixed
+};
+
+enum SVEFPAccumulatingReductionOp {
+ SVEFPAccumulatingReductionFixed = 0x65182000,
+ SVEFPAccumulatingReductionFMask = 0xFF38E000,
+ SVEFPAccumulatingReductionMask = 0xFF3FE000,
+ FADDA_v_p_z = SVEFPAccumulatingReductionFixed
+};
+
+enum SVEFPArithmeticUnpredicatedOp {
+ SVEFPArithmeticUnpredicatedFixed = 0x65000000,
+ SVEFPArithmeticUnpredicatedFMask = 0xFF20E000,
+ SVEFPArithmeticUnpredicatedMask = 0xFF20FC00,
+ FADD_z_zz = SVEFPArithmeticUnpredicatedFixed,
+ FSUB_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000400,
+ FMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000800,
+ FTSMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000C00,
+ FRECPS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001800,
+ FRSQRTS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001C00
+};
+
+enum SVEFPArithmeticWithImm_PredicatedOp {
+ SVEFPArithmeticWithImm_PredicatedFixed = 0x65188000,
+ SVEFPArithmeticWithImm_PredicatedFMask = 0xFF38E3C0,
+ SVEFPArithmeticWithImm_PredicatedMask = 0xFF3FE3C0,
+ FADD_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed,
+ FSUB_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00010000,
+ FMUL_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00020000,
+ FSUBR_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00030000,
+ FMAXNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00040000,
+ FMINNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00050000,
+ FMAX_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00060000,
+ FMIN_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00070000
+};
+
+enum SVEFPArithmetic_PredicatedOp {
+ SVEFPArithmetic_PredicatedFixed = 0x65008000,
+ SVEFPArithmetic_PredicatedFMask = 0xFF30E000,
+ SVEFPArithmetic_PredicatedMask = 0xFF3FE000,
+ FADD_z_p_zz = SVEFPArithmetic_PredicatedFixed,
+ FSUB_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00010000,
+ FMUL_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00020000,
+ FSUBR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00030000,
+ FMAXNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00040000,
+ FMINNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00050000,
+ FMAX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00060000,
+ FMIN_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00070000,
+ FABD_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00080000,
+ FSCALE_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00090000,
+ FMULX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000A0000,
+ FDIVR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000C0000,
+ FDIV_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000D0000
+};
+
+enum SVEFPCompareVectorsOp {
+ SVEFPCompareVectorsFixed = 0x65004000,
+ SVEFPCompareVectorsFMask = 0xFF204000,
+ SVEFPCompareVectorsMask = 0xFF20E010,
+ FCMGE_p_p_zz = SVEFPCompareVectorsFixed,
+ FCMGT_p_p_zz = SVEFPCompareVectorsFixed | 0x00000010,
+ FCMEQ_p_p_zz = SVEFPCompareVectorsFixed | 0x00002000,
+ FCMNE_p_p_zz = SVEFPCompareVectorsFixed | 0x00002010,
+ FCMUO_p_p_zz = SVEFPCompareVectorsFixed | 0x00008000,
+ FACGE_p_p_zz = SVEFPCompareVectorsFixed | 0x00008010,
+ FACGT_p_p_zz = SVEFPCompareVectorsFixed | 0x0000A010
+};
+
+enum SVEFPCompareWithZeroOp {
+ SVEFPCompareWithZeroFixed = 0x65102000,
+ SVEFPCompareWithZeroFMask = 0xFF38E000,
+ SVEFPCompareWithZeroMask = 0xFF3FE010,
+ FCMGE_p_p_z0 = SVEFPCompareWithZeroFixed,
+ FCMGT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00000010,
+ FCMLT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010000,
+ FCMLE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010010,
+ FCMEQ_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00020000,
+ FCMNE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00030000
+};
+
+enum SVEFPComplexAdditionOp {
+ SVEFPComplexAdditionFixed = 0x64008000,
+ SVEFPComplexAdditionFMask = 0xFF3EE000,
+ SVEFPComplexAdditionMask = 0xFF3EE000,
+ FCADD_z_p_zz = SVEFPComplexAdditionFixed
+};
+
+enum SVEFPComplexMulAddOp {
+ SVEFPComplexMulAddFixed = 0x64000000,
+ SVEFPComplexMulAddFMask = 0xFF208000,
+ SVEFPComplexMulAddMask = 0xFF208000,
+ FCMLA_z_p_zzz = SVEFPComplexMulAddFixed
+};
+
+enum SVEFPComplexMulAddIndexOp {
+ SVEFPComplexMulAddIndexFixed = 0x64201000,
+ SVEFPComplexMulAddIndexFMask = 0xFF20F000,
+ SVEFPComplexMulAddIndexMask = 0xFFE0F000,
+ FCMLA_z_zzzi_h = SVEFPComplexMulAddIndexFixed | 0x00800000,
+ FCMLA_z_zzzi_s = SVEFPComplexMulAddIndexFixed | 0x00C00000
+};
+
+enum SVEFPConvertPrecisionOp {
+ SVEFPConvertPrecisionFixed = 0x6508A000,
+ SVEFPConvertPrecisionFMask = 0xFF3CE000,
+ SVEFPConvertPrecisionMask = 0xFFFFE000,
+ FCVT_z_p_z_s2h = SVEFPConvertPrecisionFixed | 0x00800000,
+ FCVT_z_p_z_h2s = SVEFPConvertPrecisionFixed | 0x00810000,
+ FCVT_z_p_z_d2h = SVEFPConvertPrecisionFixed | 0x00C00000,
+ FCVT_z_p_z_h2d = SVEFPConvertPrecisionFixed | 0x00C10000,
+ FCVT_z_p_z_d2s = SVEFPConvertPrecisionFixed | 0x00C20000,
+ FCVT_z_p_z_s2d = SVEFPConvertPrecisionFixed | 0x00C30000
+};
+
+enum SVEFPConvertToIntOp {
+ SVEFPConvertToIntFixed = 0x6518A000,
+ SVEFPConvertToIntFMask = 0xFF38E000,
+ SVEFPConvertToIntMask = 0xFFFFE000,
+ FCVTZS_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00420000,
+ FCVTZU_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00430000,
+ FCVTZS_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00440000,
+ FCVTZU_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00450000,
+ FCVTZS_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00460000,
+ FCVTZU_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00470000,
+ FCVTZS_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00840000,
+ FCVTZU_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00850000,
+ FCVTZS_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C00000,
+ FCVTZU_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C10000,
+ FCVTZS_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C40000,
+ FCVTZU_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C50000,
+ FCVTZS_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C60000,
+ FCVTZU_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C70000
+};
+
+enum SVEFPExponentialAcceleratorOp {
+ SVEFPExponentialAcceleratorFixed = 0x0420B800,
+ SVEFPExponentialAcceleratorFMask = 0xFF20FC00,
+ SVEFPExponentialAcceleratorMask = 0xFF3FFC00,
+ FEXPA_z_z = SVEFPExponentialAcceleratorFixed
+};
+
+enum SVEFPFastReductionOp {
+ SVEFPFastReductionFixed = 0x65002000,
+ SVEFPFastReductionFMask = 0xFF38E000,
+ SVEFPFastReductionMask = 0xFF3FE000,
+ FADDV_v_p_z = SVEFPFastReductionFixed,
+ FMAXNMV_v_p_z = SVEFPFastReductionFixed | 0x00040000,
+ FMINNMV_v_p_z = SVEFPFastReductionFixed | 0x00050000,
+ FMAXV_v_p_z = SVEFPFastReductionFixed | 0x00060000,
+ FMINV_v_p_z = SVEFPFastReductionFixed | 0x00070000
+};
+
+enum SVEFPMulAddOp {
+ SVEFPMulAddFixed = 0x65200000,
+ SVEFPMulAddFMask = 0xFF200000,
+ SVEFPMulAddMask = 0xFF20E000,
+ FMLA_z_p_zzz = SVEFPMulAddFixed,
+ FMLS_z_p_zzz = SVEFPMulAddFixed | 0x00002000,
+ FNMLA_z_p_zzz = SVEFPMulAddFixed | 0x00004000,
+ FNMLS_z_p_zzz = SVEFPMulAddFixed | 0x00006000,
+ FMAD_z_p_zzz = SVEFPMulAddFixed | 0x00008000,
+ FMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000A000,
+ FNMAD_z_p_zzz = SVEFPMulAddFixed | 0x0000C000,
+ FNMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000E000
+};
+
+enum SVEFPMulAddIndexOp {
+ SVEFPMulAddIndexFixed = 0x64200000,
+ SVEFPMulAddIndexFMask = 0xFF20F800,
+ SVEFPMulAddIndexMask = 0xFFE0FC00,
+ FMLA_z_zzzi_h = SVEFPMulAddIndexFixed,
+ FMLA_z_zzzi_h_i3h = FMLA_z_zzzi_h | 0x00400000,
+ FMLS_z_zzzi_h = SVEFPMulAddIndexFixed | 0x00000400,
+ FMLS_z_zzzi_h_i3h = FMLS_z_zzzi_h | 0x00400000,
+ FMLA_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800000,
+ FMLS_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800400,
+ FMLA_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00000,
+ FMLS_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00400
+};
+
+enum SVEFPMulIndexOp {
+ SVEFPMulIndexFixed = 0x64202000,
+ SVEFPMulIndexFMask = 0xFF20FC00,
+ SVEFPMulIndexMask = 0xFFE0FC00,
+ FMUL_z_zzi_h = SVEFPMulIndexFixed,
+ FMUL_z_zzi_h_i3h = FMUL_z_zzi_h | 0x00400000,
+ FMUL_z_zzi_s = SVEFPMulIndexFixed | 0x00800000,
+ FMUL_z_zzi_d = SVEFPMulIndexFixed | 0x00C00000
+};
+
+enum SVEFPRoundToIntegralValueOp {
+ SVEFPRoundToIntegralValueFixed = 0x6500A000,
+ SVEFPRoundToIntegralValueFMask = 0xFF38E000,
+ SVEFPRoundToIntegralValueMask = 0xFF3FE000,
+ FRINTN_z_p_z = SVEFPRoundToIntegralValueFixed,
+ FRINTP_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00010000,
+ FRINTM_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00020000,
+ FRINTZ_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00030000,
+ FRINTA_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00040000,
+ FRINTX_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00060000,
+ FRINTI_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00070000
+};
+
+enum SVEFPTrigMulAddCoefficientOp {
+ SVEFPTrigMulAddCoefficientFixed = 0x65108000,
+ SVEFPTrigMulAddCoefficientFMask = 0xFF38FC00,
+ SVEFPTrigMulAddCoefficientMask = 0xFF38FC00,
+ FTMAD_z_zzi = SVEFPTrigMulAddCoefficientFixed
+};
+
+enum SVEFPTrigSelectCoefficientOp {
+ SVEFPTrigSelectCoefficientFixed = 0x0420B000,
+ SVEFPTrigSelectCoefficientFMask = 0xFF20F800,
+ SVEFPTrigSelectCoefficientMask = 0xFF20FC00,
+ FTSSEL_z_zz = SVEFPTrigSelectCoefficientFixed
+};
+
+enum SVEFPUnaryOpOp {
+ SVEFPUnaryOpFixed = 0x650CA000,
+ SVEFPUnaryOpFMask = 0xFF3CE000,
+ SVEFPUnaryOpMask = 0xFF3FE000,
+ FRECPX_z_p_z = SVEFPUnaryOpFixed,
+ FSQRT_z_p_z = SVEFPUnaryOpFixed | 0x00010000
+};
+
+enum SVEFPUnaryOpUnpredicatedOp {
+ SVEFPUnaryOpUnpredicatedFixed = 0x65083000,
+ SVEFPUnaryOpUnpredicatedFMask = 0xFF38F000,
+ SVEFPUnaryOpUnpredicatedMask = 0xFF3FFC00,
+ FRECPE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00060000,
+ FRSQRTE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00070000
+};
+
+enum SVEIncDecByPredicateCountOp {
+ SVEIncDecByPredicateCountFixed = 0x25288000,
+ SVEIncDecByPredicateCountFMask = 0xFF38F000,
+ SVEIncDecByPredicateCountMask = 0xFF3FFE00,
+ SQINCP_z_p_z = SVEIncDecByPredicateCountFixed,
+ SQINCP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00000800,
+ SQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00000C00,
+ UQINCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00010000,
+ UQINCP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00010800,
+ UQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00010C00,
+ SQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00020000,
+ SQDECP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00020800,
+ SQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00020C00,
+ UQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00030000,
+ UQDECP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00030800,
+ UQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00030C00,
+ INCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00040000,
+ INCP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00040800,
+ DECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00050000,
+ DECP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00050800
+};
+
+enum SVEIncDecRegisterByElementCountOp {
+ SVEIncDecRegisterByElementCountFixed = 0x0430E000,
+ SVEIncDecRegisterByElementCountFMask = 0xFF30F800,
+ SVEIncDecRegisterByElementCountMask = 0xFFF0FC00,
+ INCB_r_rs = SVEIncDecRegisterByElementCountFixed,
+ DECB_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00000400,
+ INCH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400000,
+ DECH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400400,
+ INCW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800000,
+ DECW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800400,
+ INCD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00000,
+ DECD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00400
+};
+
+enum SVEIncDecVectorByElementCountOp {
+ SVEIncDecVectorByElementCountFixed = 0x0430C000,
+ SVEIncDecVectorByElementCountFMask = 0xFF30F800,
+ SVEIncDecVectorByElementCountMask = 0xFFF0FC00,
+ INCH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400000,
+ DECH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400400,
+ INCW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800000,
+ DECW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800400,
+ INCD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00000,
+ DECD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00400
+};
+
+enum SVEIndexGenerationOp {
+ SVEIndexGenerationFixed = 0x04204000,
+ SVEIndexGenerationFMask = 0xFF20F000,
+ SVEIndexGenerationMask = 0xFF20FC00,
+ INDEX_z_ii = SVEIndexGenerationFixed,
+ INDEX_z_ri = SVEIndexGenerationFixed | 0x00000400,
+ INDEX_z_ir = SVEIndexGenerationFixed | 0x00000800,
+ INDEX_z_rr = SVEIndexGenerationFixed | 0x00000C00
+};
+
+enum SVEInsertGeneralRegisterOp {
+ SVEInsertGeneralRegisterFixed = 0x05243800,
+ SVEInsertGeneralRegisterFMask = 0xFF3FFC00,
+ SVEInsertGeneralRegisterMask = 0xFF3FFC00,
+ INSR_z_r = SVEInsertGeneralRegisterFixed
+};
+
+enum SVEInsertSIMDFPScalarRegisterOp {
+ SVEInsertSIMDFPScalarRegisterFixed = 0x05343800,
+ SVEInsertSIMDFPScalarRegisterFMask = 0xFF3FFC00,
+ SVEInsertSIMDFPScalarRegisterMask = 0xFF3FFC00,
+ INSR_z_v = SVEInsertSIMDFPScalarRegisterFixed
+};
+
+enum SVEIntAddSubtractImm_UnpredicatedOp {
+ SVEIntAddSubtractImm_UnpredicatedFixed = 0x2520C000,
+ SVEIntAddSubtractImm_UnpredicatedFMask = 0xFF38C000,
+ SVEIntAddSubtractImm_UnpredicatedMask = 0xFF3FC000,
+ ADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed,
+ SUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00010000,
+ SUBR_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00030000,
+ SQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00040000,
+ UQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00050000,
+ SQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00060000,
+ UQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00070000
+};
+
+enum SVEIntAddSubtractVectors_PredicatedOp {
+ SVEIntAddSubtractVectors_PredicatedFixed = 0x04000000,
+ SVEIntAddSubtractVectors_PredicatedFMask = 0xFF38E000,
+ SVEIntAddSubtractVectors_PredicatedMask = 0xFF3FE000,
+ ADD_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed,
+ SUB_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00010000,
+ SUBR_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00030000
+};
+
+enum SVEIntArithmeticUnpredicatedOp {
+ SVEIntArithmeticUnpredicatedFixed = 0x04200000,
+ SVEIntArithmeticUnpredicatedFMask = 0xFF20E000,
+ SVEIntArithmeticUnpredicatedMask = 0xFF20FC00,
+ ADD_z_zz = SVEIntArithmeticUnpredicatedFixed,
+ SUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00000400,
+ SQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001000,
+ UQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001400,
+ SQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001800,
+ UQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001C00
+};
+
+enum SVEIntCompareScalarCountAndLimitOp {
+ SVEIntCompareScalarCountAndLimitFixed = 0x25200000,
+ SVEIntCompareScalarCountAndLimitFMask = 0xFF20E000,
+ SVEIntCompareScalarCountAndLimitMask = 0xFF20EC10,
+ WHILELT_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000400,
+ WHILELE_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000410,
+ WHILELO_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C00,
+ WHILELS_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C10
+};
+
+enum SVEIntCompareSignedImmOp {
+ SVEIntCompareSignedImmFixed = 0x25000000,
+ SVEIntCompareSignedImmFMask = 0xFF204000,
+ SVEIntCompareSignedImmMask = 0xFF20E010,
+ CMPGE_p_p_zi = SVEIntCompareSignedImmFixed,
+ CMPGT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00000010,
+ CMPLT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002000,
+ CMPLE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002010,
+ CMPEQ_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008000,
+ CMPNE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008010
+};
+
+enum SVEIntCompareUnsignedImmOp {
+ SVEIntCompareUnsignedImmFixed = 0x24200000,
+ SVEIntCompareUnsignedImmFMask = 0xFF200000,
+ SVEIntCompareUnsignedImmMask = 0xFF202010,
+ CMPHS_p_p_zi = SVEIntCompareUnsignedImmFixed,
+ CMPHI_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00000010,
+ CMPLO_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002000,
+ CMPLS_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002010
+};
+
+enum SVEIntCompareVectorsOp {
+ SVEIntCompareVectorsFixed = 0x24000000,
+ SVEIntCompareVectorsFMask = 0xFF200000,
+ SVEIntCompareVectorsMask = 0xFF20E010,
+ CMPHS_p_p_zz = SVEIntCompareVectorsFixed,
+ CMPHI_p_p_zz = SVEIntCompareVectorsFixed | 0x00000010,
+ CMPEQ_p_p_zw = SVEIntCompareVectorsFixed | 0x00002000,
+ CMPNE_p_p_zw = SVEIntCompareVectorsFixed | 0x00002010,
+ CMPGE_p_p_zw = SVEIntCompareVectorsFixed | 0x00004000,
+ CMPGT_p_p_zw = SVEIntCompareVectorsFixed | 0x00004010,
+ CMPLT_p_p_zw = SVEIntCompareVectorsFixed | 0x00006000,
+ CMPLE_p_p_zw = SVEIntCompareVectorsFixed | 0x00006010,
+ CMPGE_p_p_zz = SVEIntCompareVectorsFixed | 0x00008000,
+ CMPGT_p_p_zz = SVEIntCompareVectorsFixed | 0x00008010,
+ CMPEQ_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A000,
+ CMPNE_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A010,
+ CMPHS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C000,
+ CMPHI_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C010,
+ CMPLO_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E000,
+ CMPLS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E010
+};
+
+enum SVEIntConvertToFPOp {
+ SVEIntConvertToFPFixed = 0x6510A000,
+ SVEIntConvertToFPFMask = 0xFF38E000,
+ SVEIntConvertToFPMask = 0xFFFFE000,
+ SCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00420000,
+ UCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00430000,
+ SCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00440000,
+ UCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00450000,
+ SCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00460000,
+ UCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00470000,
+ SCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00840000,
+ UCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00850000,
+ SCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C00000,
+ UCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C10000,
+ SCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C40000,
+ UCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C50000,
+ SCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C60000,
+ UCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C70000
+};
+
+enum SVEIntDivideVectors_PredicatedOp {
+ SVEIntDivideVectors_PredicatedFixed = 0x04140000,
+ SVEIntDivideVectors_PredicatedFMask = 0xFF3CE000,
+ SVEIntDivideVectors_PredicatedMask = 0xFF3FE000,
+ SDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed,
+ UDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00010000,
+ SDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00020000,
+ UDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00030000
+};
+
+enum SVEIntMinMaxDifference_PredicatedOp {
+ SVEIntMinMaxDifference_PredicatedFixed = 0x04080000,
+ SVEIntMinMaxDifference_PredicatedFMask = 0xFF38E000,
+ SVEIntMinMaxDifference_PredicatedMask = 0xFF3FE000,
+ SMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed,
+ UMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00010000,
+ SMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00020000,
+ UMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00030000,
+ SABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00040000,
+ UABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00050000
+};
+
+enum SVEIntMinMaxImm_UnpredicatedOp {
+ SVEIntMinMaxImm_UnpredicatedFixed = 0x2528C000,
+ SVEIntMinMaxImm_UnpredicatedFMask = 0xFF38C000,
+ SVEIntMinMaxImm_UnpredicatedMask = 0xFF3FE000,
+ SMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed,
+ UMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00010000,
+ SMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00020000,
+ UMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00030000
+};
+
+enum SVEIntMulAddPredicatedOp {
+ SVEIntMulAddPredicatedFixed = 0x04004000,
+ SVEIntMulAddPredicatedFMask = 0xFF204000,
+ SVEIntMulAddPredicatedMask = 0xFF20E000,
+ MLA_z_p_zzz = SVEIntMulAddPredicatedFixed,
+ MLS_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00002000,
+ MAD_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00008000,
+ MSB_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x0000A000
+};
+
+enum SVEIntMulAddUnpredicatedOp {
+ SVEIntMulAddUnpredicatedFixed = 0x44000000,
+ SVEIntMulAddUnpredicatedFMask = 0xFF208000,
+ SVEIntMulAddUnpredicatedMask = 0xFF20FC00,
+ SDOT_z_zzz = SVEIntMulAddUnpredicatedFixed,
+ UDOT_z_zzz = SVEIntMulAddUnpredicatedFixed | 0x00000400
+};
+
+enum SVEIntMulImm_UnpredicatedOp {
+ SVEIntMulImm_UnpredicatedFixed = 0x2530C000,
+ SVEIntMulImm_UnpredicatedFMask = 0xFF38C000,
+ SVEIntMulImm_UnpredicatedMask = 0xFF3FE000,
+ MUL_z_zi = SVEIntMulImm_UnpredicatedFixed
+};
+
+enum SVEIntMulVectors_PredicatedOp {
+ SVEIntMulVectors_PredicatedFixed = 0x04100000,
+ SVEIntMulVectors_PredicatedFMask = 0xFF3CE000,
+ SVEIntMulVectors_PredicatedMask = 0xFF3FE000,
+ MUL_z_p_zz = SVEIntMulVectors_PredicatedFixed,
+ SMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00020000,
+ UMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00030000
+};
+
+enum SVEMovprfxOp {
+ SVEMovprfxFixed = 0x04002000,
+ SVEMovprfxFMask = 0xFF20E000,
+ SVEMovprfxMask = 0xFF3EE000,
+ MOVPRFX_z_p_z = SVEMovprfxFixed | 0x00100000
+};
+
+enum SVEIntReductionOp {
+ SVEIntReductionFixed = 0x04002000,
+ SVEIntReductionFMask = 0xFF20E000,
+ SVEIntReductionMask = 0xFF3FE000,
+ SADDV_r_p_z = SVEIntReductionFixed,
+ UADDV_r_p_z = SVEIntReductionFixed | 0x00010000,
+ SMAXV_r_p_z = SVEIntReductionFixed | 0x00080000,
+ UMAXV_r_p_z = SVEIntReductionFixed | 0x00090000,
+ SMINV_r_p_z = SVEIntReductionFixed | 0x000A0000,
+ UMINV_r_p_z = SVEIntReductionFixed | 0x000B0000
+};
+
+enum SVEIntReductionLogicalOp {
+ SVEIntReductionLogicalFixed = 0x04182000,
+ SVEIntReductionLogicalFMask = 0xFF38E000,
+ SVEIntReductionLogicalMask = 0xFF3FE000,
+ ORV_r_p_z = SVEIntReductionLogicalFixed | 0x00180000,
+ EORV_r_p_z = SVEIntReductionLogicalFixed | 0x00190000,
+ ANDV_r_p_z = SVEIntReductionLogicalFixed | 0x001A0000
+};
+
+enum SVEIntUnaryArithmeticPredicatedOp {
+ SVEIntUnaryArithmeticPredicatedFixed = 0x0400A000,
+ SVEIntUnaryArithmeticPredicatedFMask = 0xFF20E000,
+ SVEIntUnaryArithmeticPredicatedMask = 0xFF3FE000,
+ SXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00100000,
+ UXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00110000,
+ SXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00120000,
+ UXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00130000,
+ SXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00140000,
+ UXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00150000,
+ ABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00160000,
+ NEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00170000,
+ CLS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00180000,
+ CLZ_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00190000,
+ CNT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001A0000,
+ CNOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001B0000,
+ FABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001C0000,
+ FNEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001D0000,
+ NOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001E0000
+};
+
+enum SVELoadAndBroadcastElementOp {
+ SVELoadAndBroadcastElementFixed = 0x84408000,
+ SVELoadAndBroadcastElementFMask = 0xFE408000,
+ SVELoadAndBroadcastElementMask = 0xFFC0E000,
+ LD1RB_z_p_bi_u8 = SVELoadAndBroadcastElementFixed,
+ LD1RB_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00002000,
+ LD1RB_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00004000,
+ LD1RB_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00006000,
+ LD1RSW_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x00800000,
+ LD1RH_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00802000,
+ LD1RH_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00804000,
+ LD1RH_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00806000,
+ LD1RSH_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01000000,
+ LD1RSH_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01002000,
+ LD1RW_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x01004000,
+ LD1RW_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01006000,
+ LD1RSB_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01800000,
+ LD1RSB_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01802000,
+ LD1RSB_z_p_bi_s16 = SVELoadAndBroadcastElementFixed | 0x01804000,
+ LD1RD_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01806000
+};
+
+enum SVELoadAndBroadcastQuadword_ScalarPlusImmOp {
+ SVELoadAndBroadcastQuadword_ScalarPlusImmFixed = 0xA4002000,
+ SVELoadAndBroadcastQuadword_ScalarPlusImmFMask = 0xFE10E000,
+ SVELoadAndBroadcastQuadword_ScalarPlusImmMask = 0xFFF0E000,
+ LD1RQB_z_p_bi_u8 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed,
+ LD1RQH_z_p_bi_u16 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x00800000,
+ LD1RQW_z_p_bi_u32 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01000000,
+ LD1RQD_z_p_bi_u64 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVELoadAndBroadcastQuadword_ScalarPlusScalarOp {
+ SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed = 0xA4000000,
+ SVELoadAndBroadcastQuadword_ScalarPlusScalarFMask = 0xFE00E000,
+ SVELoadAndBroadcastQuadword_ScalarPlusScalarMask = 0xFFE0E000,
+ LD1RQB_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed,
+ LD1RQH_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x00800000,
+ LD1RQW_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01000000,
+ LD1RQD_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVELoadMultipleStructures_ScalarPlusImmOp {
+ SVELoadMultipleStructures_ScalarPlusImmFixed = 0xA400E000,
+ SVELoadMultipleStructures_ScalarPlusImmFMask = 0xFE10E000,
+ SVELoadMultipleStructures_ScalarPlusImmMask = 0xFFF0E000,
+ LD2B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00200000,
+ LD3B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00400000,
+ LD4B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00600000,
+ LD2H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00A00000,
+ LD3H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00C00000,
+ LD4H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00E00000,
+ LD2W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01200000,
+ LD3W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01400000,
+ LD4W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01600000,
+ LD2D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01A00000,
+ LD3D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01C00000,
+ LD4D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVELoadMultipleStructures_ScalarPlusScalarOp {
+ SVELoadMultipleStructures_ScalarPlusScalarFixed = 0xA400C000,
+ SVELoadMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000,
+ SVELoadMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000,
+ LD2B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00200000,
+ LD3B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00400000,
+ LD4B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00600000,
+ LD2H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00A00000,
+ LD3H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00C00000,
+ LD4H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00E00000,
+ LD2W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01200000,
+ LD3W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01400000,
+ LD4W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01600000,
+ LD2D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01A00000,
+ LD3D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01C00000,
+ LD4D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVELoadPredicateRegisterOp {
+ SVELoadPredicateRegisterFixed = 0x85800000,
+ SVELoadPredicateRegisterFMask = 0xFFC0E010,
+ SVELoadPredicateRegisterMask = 0xFFC0E010,
+ LDR_p_bi = SVELoadPredicateRegisterFixed
+};
+
+enum SVELoadVectorRegisterOp {
+ SVELoadVectorRegisterFixed = 0x85804000,
+ SVELoadVectorRegisterFMask = 0xFFC0E000,
+ SVELoadVectorRegisterMask = 0xFFC0E000,
+ LDR_z_bi = SVELoadVectorRegisterFixed
+};
+
+enum SVEMulIndexOp {
+ SVEMulIndexFixed = 0x44200000,
+ SVEMulIndexFMask = 0xFF200000,
+ SVEMulIndexMask = 0xFFE0FC00,
+ SDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800000,
+ UDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800400,
+ SDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00000,
+ UDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00400
+};
+
+enum SVEPartitionBreakConditionOp {
+ SVEPartitionBreakConditionFixed = 0x25104000,
+ SVEPartitionBreakConditionFMask = 0xFF3FC200,
+ SVEPartitionBreakConditionMask = 0xFFFFC200,
+ BRKA_p_p_p = SVEPartitionBreakConditionFixed,
+ BRKAS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00400000,
+ BRKB_p_p_p = SVEPartitionBreakConditionFixed | 0x00800000,
+ BRKBS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00C00000
+};
+
+enum SVEPermutePredicateElementsOp {
+ SVEPermutePredicateElementsFixed = 0x05204000,
+ SVEPermutePredicateElementsFMask = 0xFF30E210,
+ SVEPermutePredicateElementsMask = 0xFF30FE10,
+ ZIP1_p_pp = SVEPermutePredicateElementsFixed,
+ ZIP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000400,
+ UZP1_p_pp = SVEPermutePredicateElementsFixed | 0x00000800,
+ UZP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000C00,
+ TRN1_p_pp = SVEPermutePredicateElementsFixed | 0x00001000,
+ TRN2_p_pp = SVEPermutePredicateElementsFixed | 0x00001400
+};
+
+enum SVEPermuteVectorExtractOp {
+ SVEPermuteVectorExtractFixed = 0x05200000,
+ SVEPermuteVectorExtractFMask = 0xFF20E000,
+ SVEPermuteVectorExtractMask = 0xFFE0E000,
+ EXT_z_zi_des = SVEPermuteVectorExtractFixed
+};
+
+enum SVEPermuteVectorInterleavingOp {
+ SVEPermuteVectorInterleavingFixed = 0x05206000,
+ SVEPermuteVectorInterleavingFMask = 0xFF20E000,
+ SVEPermuteVectorInterleavingMask = 0xFF20FC00,
+ ZIP1_z_zz = SVEPermuteVectorInterleavingFixed,
+ ZIP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000400,
+ UZP1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000800,
+ UZP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000C00,
+ TRN1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001000,
+ TRN2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001400
+};
+
+enum SVEPredicateCountOp {
+ SVEPredicateCountFixed = 0x25208000,
+ SVEPredicateCountFMask = 0xFF38C000,
+ SVEPredicateCountMask = 0xFF3FC200,
+ CNTP_r_p_p = SVEPredicateCountFixed
+};
+
+enum SVEPredicateFirstActiveOp {
+ SVEPredicateFirstActiveFixed = 0x2518C000,
+ SVEPredicateFirstActiveFMask = 0xFF3FFE10,
+ SVEPredicateFirstActiveMask = 0xFFFFFE10,
+ PFIRST_p_p_p = SVEPredicateFirstActiveFixed | 0x00400000
+};
+
+enum SVEPredicateInitializeOp {
+ SVEPredicateInitializeFixed = 0x2518E000,
+ SVEPredicateInitializeFMask = 0xFF3EFC10,
+ SVEPredicateInitializeMask = 0xFF3FFC10,
+ SVEPredicateInitializeSetFlagsBit = 0x00010000,
+ PTRUE_p_s = SVEPredicateInitializeFixed | 0x00000000,
+ PTRUES_p_s = SVEPredicateInitializeFixed | SVEPredicateInitializeSetFlagsBit
+};
+
+enum SVEPredicateLogicalOp {
+ SVEPredicateLogicalFixed = 0x25004000,
+ SVEPredicateLogicalFMask = 0xFF30C000,
+ SVEPredicateLogicalMask = 0xFFF0C210,
+ SVEPredicateLogicalSetFlagsBit = 0x00400000,
+ AND_p_p_pp_z = SVEPredicateLogicalFixed,
+ ANDS_p_p_pp_z = AND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ BIC_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000010,
+ BICS_p_p_pp_z = BIC_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ EOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000200,
+ EORS_p_p_pp_z = EOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ ORR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800000,
+ ORRS_p_p_pp_z = ORR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ ORN_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800010,
+ ORNS_p_p_pp_z = ORN_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ NAND_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800210,
+ NANDS_p_p_pp_z = NAND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ NOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800200,
+ NORS_p_p_pp_z = NOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+ SEL_p_p_pp = SVEPredicateLogicalFixed | 0x00000210
+};
+
+enum SVEPredicateNextActiveOp {
+ SVEPredicateNextActiveFixed = 0x2519C400,
+ SVEPredicateNextActiveFMask = 0xFF3FFE10,
+ SVEPredicateNextActiveMask = 0xFF3FFE10,
+ PNEXT_p_p_p = SVEPredicateNextActiveFixed
+};
+
+enum SVEPredicateReadFromFFR_PredicatedOp {
+ SVEPredicateReadFromFFR_PredicatedFixed = 0x2518F000,
+ SVEPredicateReadFromFFR_PredicatedFMask = 0xFF3FFE10,
+ SVEPredicateReadFromFFR_PredicatedMask = 0xFFFFFE10,
+ RDFFR_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed,
+ RDFFRS_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed | 0x00400000
+};
+
+enum SVEPredicateReadFromFFR_UnpredicatedOp {
+ SVEPredicateReadFromFFR_UnpredicatedFixed = 0x2519F000,
+ SVEPredicateReadFromFFR_UnpredicatedFMask = 0xFF3FFFF0,
+ SVEPredicateReadFromFFR_UnpredicatedMask = 0xFFFFFFF0,
+ RDFFR_p_f = SVEPredicateReadFromFFR_UnpredicatedFixed
+};
+
+enum SVEPredicateTestOp {
+ SVEPredicateTestFixed = 0x2510C000,
+ SVEPredicateTestFMask = 0xFF3FC210,
+ SVEPredicateTestMask = 0xFFFFC21F,
+ PTEST_p_p = SVEPredicateTestFixed | 0x00400000
+};
+
+enum SVEPredicateZeroOp {
+ SVEPredicateZeroFixed = 0x2518E400,
+ SVEPredicateZeroFMask = 0xFF3FFFF0,
+ SVEPredicateZeroMask = 0xFFFFFFF0,
+ PFALSE_p = SVEPredicateZeroFixed
+};
+
+enum SVEPropagateBreakOp {
+ SVEPropagateBreakFixed = 0x2500C000,
+ SVEPropagateBreakFMask = 0xFF30C000,
+ SVEPropagateBreakMask = 0xFFF0C210,
+ BRKPA_p_p_pp = SVEPropagateBreakFixed,
+ BRKPB_p_p_pp = SVEPropagateBreakFixed | 0x00000010,
+ BRKPAS_p_p_pp = SVEPropagateBreakFixed | 0x00400000,
+ BRKPBS_p_p_pp = SVEPropagateBreakFixed | 0x00400010
+};
+
+enum SVEPropagateBreakToNextPartitionOp {
+ SVEPropagateBreakToNextPartitionFixed = 0x25184000,
+ SVEPropagateBreakToNextPartitionFMask = 0xFFBFC210,
+ SVEPropagateBreakToNextPartitionMask = 0xFFFFC210,
+ BRKN_p_p_pp = SVEPropagateBreakToNextPartitionFixed,
+ BRKNS_p_p_pp = SVEPropagateBreakToNextPartitionFixed | 0x00400000
+};
+
+enum SVEReversePredicateElementsOp {
+ SVEReversePredicateElementsFixed = 0x05344000,
+ SVEReversePredicateElementsFMask = 0xFF3FFE10,
+ SVEReversePredicateElementsMask = 0xFF3FFE10,
+ REV_p_p = SVEReversePredicateElementsFixed
+};
+
+enum SVEReverseVectorElementsOp {
+ SVEReverseVectorElementsFixed = 0x05383800,
+ SVEReverseVectorElementsFMask = 0xFF3FFC00,
+ SVEReverseVectorElementsMask = 0xFF3FFC00,
+ REV_z_z = SVEReverseVectorElementsFixed
+};
+
+enum SVEReverseWithinElementsOp {
+ SVEReverseWithinElementsFixed = 0x05248000,
+ SVEReverseWithinElementsFMask = 0xFF3CE000,
+ SVEReverseWithinElementsMask = 0xFF3FE000,
+ REVB_z_z = SVEReverseWithinElementsFixed,
+ REVH_z_z = SVEReverseWithinElementsFixed | 0x00010000,
+ REVW_z_z = SVEReverseWithinElementsFixed | 0x00020000,
+ RBIT_z_p_z = SVEReverseWithinElementsFixed | 0x00030000
+};
+
+enum SVESaturatingIncDecRegisterByElementCountOp {
+ SVESaturatingIncDecRegisterByElementCountFixed = 0x0420F000,
+ SVESaturatingIncDecRegisterByElementCountFMask = 0xFF20F000,
+ SVESaturatingIncDecRegisterByElementCountMask = 0xFFF0FC00,
+ SQINCB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed,
+ UQINCB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000400,
+ SQDECB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000800,
+ UQDECB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000C00,
+ SQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100000,
+ UQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100400,
+ SQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100800,
+ UQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100C00,
+ SQINCH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400000,
+ UQINCH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400400,
+ SQDECH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400800,
+ UQDECH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400C00,
+ SQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500000,
+ UQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500400,
+ SQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500800,
+ UQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500C00,
+ SQINCW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800000,
+ UQINCW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800400,
+ SQDECW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800800,
+ UQDECW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800C00,
+ SQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900000,
+ UQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900400,
+ SQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900800,
+ UQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900C00,
+ SQINCD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00000,
+ UQINCD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00400,
+ SQDECD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00800,
+ UQDECD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00C00,
+ SQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00000,
+ UQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00400,
+ SQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00800,
+ UQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00C00
+};
+
+enum SVESaturatingIncDecVectorByElementCountOp {
+ SVESaturatingIncDecVectorByElementCountFixed = 0x0420C000,
+ SVESaturatingIncDecVectorByElementCountFMask = 0xFF30F000,
+ SVESaturatingIncDecVectorByElementCountMask = 0xFFF0FC00,
+ SQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400000,
+ UQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400400,
+ SQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400800,
+ UQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400C00,
+ SQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800000,
+ UQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800400,
+ SQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800800,
+ UQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800C00,
+ SQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00000,
+ UQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00400,
+ SQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00800,
+ UQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00C00
+};
+
+enum SVEStackFrameAdjustmentOp {
+ SVEStackFrameAdjustmentFixed = 0x04205000,
+ SVEStackFrameAdjustmentFMask = 0xFFA0F800,
+ SVEStackFrameAdjustmentMask = 0xFFE0F800,
+ ADDVL_r_ri = SVEStackFrameAdjustmentFixed,
+ ADDPL_r_ri = SVEStackFrameAdjustmentFixed | 0x00400000
+};
+
+enum SVEStackFrameSizeOp {
+ SVEStackFrameSizeFixed = 0x04BF5000,
+ SVEStackFrameSizeFMask = 0xFFFFF800,
+ SVEStackFrameSizeMask = 0xFFFFF800,
+ RDVL_r_i = SVEStackFrameSizeFixed
+};
+
+enum SVEStoreMultipleStructures_ScalarPlusImmOp {
+ SVEStoreMultipleStructures_ScalarPlusImmFixed = 0xE410E000,
+ SVEStoreMultipleStructures_ScalarPlusImmFMask = 0xFE10E000,
+ SVEStoreMultipleStructures_ScalarPlusImmMask = 0xFFF0E000,
+ ST2B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00200000,
+ ST3B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00400000,
+ ST4B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00600000,
+ ST2H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00A00000,
+ ST3H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00C00000,
+ ST4H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00E00000,
+ ST2W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01200000,
+ ST3W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01400000,
+ ST4W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01600000,
+ ST2D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01A00000,
+ ST3D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01C00000,
+ ST4D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVEStoreMultipleStructures_ScalarPlusScalarOp {
+ SVEStoreMultipleStructures_ScalarPlusScalarFixed = 0xE4006000,
+ SVEStoreMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000,
+ SVEStoreMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000,
+ ST2B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00200000,
+ ST3B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00400000,
+ ST4B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00600000,
+ ST2H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00A00000,
+ ST3H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00C00000,
+ ST4H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00E00000,
+ ST2W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01200000,
+ ST3W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01400000,
+ ST4W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01600000,
+ ST2D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01A00000,
+ ST3D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01C00000,
+ ST4D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVEStorePredicateRegisterOp {
+ SVEStorePredicateRegisterFixed = 0xE5800000,
+ SVEStorePredicateRegisterFMask = 0xFFC0E010,
+ SVEStorePredicateRegisterMask = 0xFFC0E010,
+ STR_p_bi = SVEStorePredicateRegisterFixed
+};
+
+enum SVEStoreVectorRegisterOp {
+ SVEStoreVectorRegisterFixed = 0xE5804000,
+ SVEStoreVectorRegisterFMask = 0xFFC0E000,
+ SVEStoreVectorRegisterMask = 0xFFC0E000,
+ STR_z_bi = SVEStoreVectorRegisterFixed
+};
+
+enum SVETableLookupOp {
+ SVETableLookupFixed = 0x05203000,
+ SVETableLookupFMask = 0xFF20FC00,
+ SVETableLookupMask = 0xFF20FC00,
+ TBL_z_zz_1 = SVETableLookupFixed
+};
+
+enum SVEUnpackPredicateElementsOp {
+ SVEUnpackPredicateElementsFixed = 0x05304000,
+ SVEUnpackPredicateElementsFMask = 0xFFFEFE10,
+ SVEUnpackPredicateElementsMask = 0xFFFFFE10,
+ PUNPKLO_p_p = SVEUnpackPredicateElementsFixed,
+ PUNPKHI_p_p = SVEUnpackPredicateElementsFixed | 0x00010000
+};
+
+enum SVEUnpackVectorElementsOp {
+ SVEUnpackVectorElementsFixed = 0x05303800,
+ SVEUnpackVectorElementsFMask = 0xFF3CFC00,
+ SVEUnpackVectorElementsMask = 0xFF3FFC00,
+ SUNPKLO_z_z = SVEUnpackVectorElementsFixed,
+ SUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00010000,
+ UUNPKLO_z_z = SVEUnpackVectorElementsFixed | 0x00020000,
+ UUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00030000
+};
+
+enum SVEVectorSelectOp {
+ SVEVectorSelectFixed = 0x0520C000,
+ SVEVectorSelectFMask = 0xFF20C000,
+ SVEVectorSelectMask = 0xFF20C000,
+ SEL_z_p_zz = SVEVectorSelectFixed
+};
+
+enum SVEVectorSplice_DestructiveOp {
+ SVEVectorSplice_DestructiveFixed = 0x052C8000,
+ SVEVectorSplice_DestructiveFMask = 0xFF3FE000,
+ SVEVectorSplice_DestructiveMask = 0xFF3FE000,
+ SPLICE_z_p_zz_des = SVEVectorSplice_DestructiveFixed
+};
+
enum ReservedOp {
ReservedFixed = 0x00000000,
ReservedFMask = 0x1E000000,
ReservedMask = 0xFFFF0000,
-
UDF = ReservedFixed | 0x00000000
};
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index f5e4fca5..a31e010d 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -39,10 +39,15 @@ namespace aarch64 {
const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
+const IDRegister::Field AA64PFR0::kRAS(28);
const IDRegister::Field AA64PFR0::kSVE(32);
const IDRegister::Field AA64PFR0::kDIT(48);
+const IDRegister::Field AA64PFR0::kCSV2(56);
+const IDRegister::Field AA64PFR0::kCSV3(60);
const IDRegister::Field AA64PFR1::kBT(0);
+const IDRegister::Field AA64PFR1::kSSBS(4);
+const IDRegister::Field AA64PFR1::kMTE(8);
const IDRegister::Field AA64ISAR0::kAES(4);
const IDRegister::Field AA64ISAR0::kSHA1(8);
@@ -56,6 +61,7 @@ const IDRegister::Field AA64ISAR0::kSM4(40);
const IDRegister::Field AA64ISAR0::kDP(44);
const IDRegister::Field AA64ISAR0::kFHM(48);
const IDRegister::Field AA64ISAR0::kTS(52);
+const IDRegister::Field AA64ISAR0::kRNDR(60);
const IDRegister::Field AA64ISAR1::kDPB(0);
const IDRegister::Field AA64ISAR1::kAPA(4);
@@ -68,23 +74,41 @@ const IDRegister::Field AA64ISAR1::kGPI(28);
const IDRegister::Field AA64ISAR1::kFRINTTS(32);
const IDRegister::Field AA64ISAR1::kSB(36);
const IDRegister::Field AA64ISAR1::kSPECRES(40);
+const IDRegister::Field AA64ISAR1::kBF16(44);
+const IDRegister::Field AA64ISAR1::kDGH(48);
+const IDRegister::Field AA64ISAR1::kI8MM(52);
const IDRegister::Field AA64MMFR1::kLO(16);
+const IDRegister::Field AA64MMFR2::kAT(32);
+
+const IDRegister::Field AA64ZFR0::kBF16(20);
+const IDRegister::Field AA64ZFR0::kI8MM(44);
+const IDRegister::Field AA64ZFR0::kF32MM(52);
+const IDRegister::Field AA64ZFR0::kF64MM(56);
+
CPUFeatures AA64PFR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
+ if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
+ if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
+ if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
+ if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
return f;
}
CPUFeatures AA64PFR1::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
+ if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
+ if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
+ if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
+ if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
return f;
}
@@ -105,20 +129,38 @@ CPUFeatures AA64ISAR0::GetCPUFeatures() const {
if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
+ if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
return f;
}
CPUFeatures AA64ISAR1::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
+ if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
+ if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
+ if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
+ if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+ if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
+ if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
+
+ // Only one of these fields should be non-zero, but they have the same
+ // encodings, so merge the logic.
+ int apx = std::max(Get(kAPI), Get(kAPA));
+ if (apx >= 1) {
+ f.Combine(CPUFeatures::kPAuth);
+ // APA (rather than API) indicates QARMA.
+ if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
+ if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
+ if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
+ if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
+ if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
+ }
- if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth);
- if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA);
if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
if (Get(kGPA) >= 1) {
f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
@@ -132,6 +174,23 @@ CPUFeatures AA64MMFR1::GetCPUFeatures() const {
return f;
}
+CPUFeatures AA64MMFR2::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
+ return f;
+}
+
+CPUFeatures AA64ZFR0::GetCPUFeatures() const {
+ // This register is only available with SVE, but reads-as-zero in its absence,
+ // so it's always safe to read it.
+ CPUFeatures f;
+ if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
+ if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
+ if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
+ if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
+ return f;
+}
+
int IDRegister::Get(IDRegister::Field field) const {
int msb = field.GetMsb();
int lsb = field.GetLsb();
@@ -149,7 +208,8 @@ int IDRegister::Get(IDRegister::Field field) const {
CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
CPUFeatures f;
-#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures());
+#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
+ f.Combine(Read##NAME().GetCPUFeatures());
VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
#undef VIXL_COMBINE_ID_REG
return f;
@@ -163,49 +223,73 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
// Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
// than explicit bits, but explicit bits allow us to identify features that
// the toolchain doesn't know about.
- static const CPUFeatures::Feature kFeatureBits[] = {
- // Bits 0-7
- CPUFeatures::kFP,
- CPUFeatures::kNEON,
- CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
- CPUFeatures::kAES,
- CPUFeatures::kPmull1Q,
- CPUFeatures::kSHA1,
- CPUFeatures::kSHA2,
- CPUFeatures::kCRC32,
- // Bits 8-15
- CPUFeatures::kAtomics,
- CPUFeatures::kFPHalf,
- CPUFeatures::kNEONHalf,
- CPUFeatures::kIDRegisterEmulation,
- CPUFeatures::kRDM,
- CPUFeatures::kJSCVT,
- CPUFeatures::kFcma,
- CPUFeatures::kRCpc,
- // Bits 16-23
- CPUFeatures::kDCPoP,
- CPUFeatures::kSHA3,
- CPUFeatures::kSM3,
- CPUFeatures::kSM4,
- CPUFeatures::kDotProduct,
- CPUFeatures::kSHA512,
- CPUFeatures::kSVE,
- CPUFeatures::kFHM,
- // Bits 24-27
- CPUFeatures::kDIT,
- CPUFeatures::kUSCAT,
- CPUFeatures::kRCpcImm,
- CPUFeatures::kFlagM
- // Bits 28-31 are unassigned.
- };
- static const size_t kFeatureBitCount =
- sizeof(kFeatureBits) / sizeof(kFeatureBits[0]);
-
- unsigned long auxv = getauxval(AT_HWCAP); // NOLINT(runtime/int)
-
- VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte));
- for (size_t i = 0; i < kFeatureBitCount; i++) {
- if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]);
+ static const CPUFeatures::Feature kFeatureBits[] =
+ {// Bits 0-7
+ CPUFeatures::kFP,
+ CPUFeatures::kNEON,
+ CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
+ CPUFeatures::kAES,
+ CPUFeatures::kPmull1Q,
+ CPUFeatures::kSHA1,
+ CPUFeatures::kSHA2,
+ CPUFeatures::kCRC32,
+ // Bits 8-15
+ CPUFeatures::kAtomics,
+ CPUFeatures::kFPHalf,
+ CPUFeatures::kNEONHalf,
+ CPUFeatures::kIDRegisterEmulation,
+ CPUFeatures::kRDM,
+ CPUFeatures::kJSCVT,
+ CPUFeatures::kFcma,
+ CPUFeatures::kRCpc,
+ // Bits 16-23
+ CPUFeatures::kDCPoP,
+ CPUFeatures::kSHA3,
+ CPUFeatures::kSM3,
+ CPUFeatures::kSM4,
+ CPUFeatures::kDotProduct,
+ CPUFeatures::kSHA512,
+ CPUFeatures::kSVE,
+ CPUFeatures::kFHM,
+ // Bits 24-31
+ CPUFeatures::kDIT,
+ CPUFeatures::kUSCAT,
+ CPUFeatures::kRCpcImm,
+ CPUFeatures::kFlagM,
+ CPUFeatures::kSSBSControl,
+ CPUFeatures::kSB,
+ CPUFeatures::kPAuth,
+ CPUFeatures::kPAuthGeneric,
+ // Bits 32-39
+ CPUFeatures::kDCCVADP,
+ CPUFeatures::kNone, // "sve2"
+ CPUFeatures::kNone, // "sveaes"
+ CPUFeatures::kNone, // "svepmull"
+ CPUFeatures::kNone, // "svebitperm"
+ CPUFeatures::kNone, // "svesha3"
+ CPUFeatures::kNone, // "svesm4"
+ CPUFeatures::kFrintToFixedSizedInt,
+ // Bits 40-47
+ CPUFeatures::kSVEI8MM,
+ CPUFeatures::kSVEF32MM,
+ CPUFeatures::kSVEF64MM,
+ CPUFeatures::kSVEBF16,
+ CPUFeatures::kI8MM,
+ CPUFeatures::kBF16,
+ CPUFeatures::kDGH,
+ CPUFeatures::kRNG,
+ // Bits 48+
+ CPUFeatures::kBTI};
+
+ uint64_t hwcap_low32 = getauxval(AT_HWCAP);
+ uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
+ VIXL_ASSERT(IsUint32(hwcap_low32));
+ VIXL_ASSERT(IsUint32(hwcap_high32));
+ uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
+
+ VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
+ for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
+ if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
}
#endif // VIXL_USE_LINUX_HWCAP
@@ -218,17 +302,17 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
#ifdef __aarch64__
-#define VIXL_READ_ID_REG(NAME) \
- NAME CPU::Read##NAME() { \
- uint64_t value = 0; \
- __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \
- return NAME(value); \
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
+ NAME CPU::Read##NAME() { \
+ uint64_t value = 0; \
+ __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
+ return NAME(value); \
}
#else // __aarch64__
-#define VIXL_READ_ID_REG(NAME) \
- NAME CPU::Read##NAME() { \
- /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \
- VIXL_ABORT(); \
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
+ NAME CPU::Read##NAME() { \
+ VIXL_UNREACHABLE(); \
+ return NAME(0); \
}
#endif // __aarch64__
@@ -282,6 +366,27 @@ uint32_t CPU::GetCacheType() {
}
+// Query the SVE vector length. This requires CPUFeatures::kSVE.
+int CPU::ReadSVEVectorLengthInBits() {
+#ifdef __aarch64__
+ uint64_t vl;
+ // To support compilers that don't understand `rdvl`, encode the value
+ // directly and move it manually.
+ __asm__(
+ " .word 0x04bf5100\n" // rdvl x0, #8
+ " mov %[vl], x0\n"
+ : [vl] "=r"(vl)
+ :
+ : "x0");
+ VIXL_ASSERT(vl <= INT_MAX);
+ return static_cast<int>(vl);
+#else
+ VIXL_UNREACHABLE();
+ return 0;
+#endif
+}
+
+
void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
#ifdef __aarch64__
// Implement the cache synchronisation for all targets where AArch64 is the
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index d2b2ee87..2bf1e60f 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -56,7 +56,11 @@ class IDRegister {
public:
enum Type { kUnsigned, kSigned };
- explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {}
+ // This needs to be constexpr so that fields have "constant initialisation".
+ // This avoids initialisation order problems when these values are used to
+ // (dynamically) initialise static variables, etc.
+ explicit constexpr Field(int lsb, Type type = kUnsigned)
+ : lsb_(lsb), type_(type) {}
static const int kMaxWidthInBits = 4;
@@ -92,8 +96,11 @@ class AA64PFR0 : public IDRegister {
private:
static const Field kFP;
static const Field kAdvSIMD;
+ static const Field kRAS;
static const Field kSVE;
static const Field kDIT;
+ static const Field kCSV2;
+ static const Field kCSV3;
};
class AA64PFR1 : public IDRegister {
@@ -104,6 +111,8 @@ class AA64PFR1 : public IDRegister {
private:
static const Field kBT;
+ static const Field kSSBS;
+ static const Field kMTE;
};
class AA64ISAR0 : public IDRegister {
@@ -125,6 +134,7 @@ class AA64ISAR0 : public IDRegister {
static const Field kDP;
static const Field kFHM;
static const Field kTS;
+ static const Field kRNDR;
};
class AA64ISAR1 : public IDRegister {
@@ -145,6 +155,9 @@ class AA64ISAR1 : public IDRegister {
static const Field kFRINTTS;
static const Field kSB;
static const Field kSPECRES;
+ static const Field kBF16;
+ static const Field kDGH;
+ static const Field kI8MM;
};
class AA64MMFR1 : public IDRegister {
@@ -157,6 +170,29 @@ class AA64MMFR1 : public IDRegister {
static const Field kLO;
};
+class AA64MMFR2 : public IDRegister {
+ public:
+ explicit AA64MMFR2(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kAT;
+};
+
+class AA64ZFR0 : public IDRegister {
+ public:
+ explicit AA64ZFR0(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kBF16;
+ static const Field kI8MM;
+ static const Field kF32MM;
+ static const Field kF64MM;
+};
+
class CPU {
public:
// Initialise CPU support.
@@ -184,6 +220,9 @@ class CPU {
CPUFeatures::QueryIDRegistersOption option =
CPUFeatures::kQueryIDRegistersIfAvailable);
+ // Query the SVE vector length. This requires CPUFeatures::kSVE.
+ static int ReadSVEVectorLengthInBits();
+
// Handle tagged pointers.
template <typename T>
static T SetPointerTag(T pointer, uint64_t tag) {
@@ -211,14 +250,18 @@ class CPU {
}
private:
-#define VIXL_AARCH64_ID_REG_LIST(V) \
- V(AA64PFR0) \
- V(AA64PFR1) \
- V(AA64ISAR0) \
- V(AA64ISAR1) \
- V(AA64MMFR1)
-
-#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME();
+#define VIXL_AARCH64_ID_REG_LIST(V) \
+ V(AA64PFR0, "ID_AA64PFR0_EL1") \
+ V(AA64PFR1, "ID_AA64PFR1_EL1") \
+ V(AA64ISAR0, "ID_AA64ISAR0_EL1") \
+ V(AA64ISAR1, "ID_AA64ISAR1_EL1") \
+ V(AA64MMFR1, "ID_AA64MMFR1_EL1") \
+ /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
+ /* read them, but some compilers don't accept the symbolic names. */ \
+ V(AA64MMFR2, "S3_0_C0_C7_2") \
+ V(AA64ZFR0, "S3_0_C0_C4_4")
+
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME();
// On native AArch64 platforms, read the named CPU ID registers. These require
// CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
// platforms.
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 474803a1..abe63d39 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -870,7 +870,6 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) {
scope.Record(CPUFeatures::kFP);
if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf);
}
- USE(instr);
}
void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) {
@@ -1068,6 +1067,165 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
USE(instr);
}
+// Most SVE visitors require only SVE.
+#define VIXL_SIMPLE_SVE_VISITOR_LIST(V) \
+ V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \
+ V(SVE32BitGatherLoad_VectorPlusImm) \
+ V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitGatherPrefetch_VectorPlusImm) \
+ V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \
+ V(SVE32BitScatterStore_VectorPlusImm) \
+ V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \
+ V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \
+ V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \
+ V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \
+ V(SVE64BitGatherLoad_VectorPlusImm) \
+ V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \
+ V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+ V(SVE64BitGatherPrefetch_VectorPlusImm) \
+ V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \
+ V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \
+ V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \
+ V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+ V(SVE64BitScatterStore_VectorPlusImm) \
+ V(SVEAddressGeneration) \
+ V(SVEBitwiseLogicalUnpredicated) \
+ V(SVEBitwiseShiftUnpredicated) \
+ V(SVEFFRInitialise) \
+ V(SVEFFRWriteFromPredicate) \
+ V(SVEFPAccumulatingReduction) \
+ V(SVEFPArithmeticUnpredicated) \
+ V(SVEFPCompareVectors) \
+ V(SVEFPCompareWithZero) \
+ V(SVEFPComplexAddition) \
+ V(SVEFPComplexMulAdd) \
+ V(SVEFPComplexMulAddIndex) \
+ V(SVEFPFastReduction) \
+ V(SVEFPMulIndex) \
+ V(SVEFPMulAdd) \
+ V(SVEFPMulAddIndex) \
+ V(SVEFPUnaryOpUnpredicated) \
+ V(SVEIncDecByPredicateCount) \
+ V(SVEIndexGeneration) \
+ V(SVEIntArithmeticUnpredicated) \
+ V(SVEIntCompareSignedImm) \
+ V(SVEIntCompareUnsignedImm) \
+ V(SVEIntCompareVectors) \
+ V(SVEIntMulAddPredicated) \
+ V(SVEIntMulAddUnpredicated) \
+ V(SVEIntReduction) \
+ V(SVEIntUnaryArithmeticPredicated) \
+ V(SVEMovprfx) \
+ V(SVEMulIndex) \
+ V(SVEPermuteVectorExtract) \
+ V(SVEPermuteVectorInterleaving) \
+ V(SVEPredicateCount) \
+ V(SVEPredicateLogical) \
+ V(SVEPropagateBreak) \
+ V(SVEStackFrameAdjustment) \
+ V(SVEStackFrameSize) \
+ V(SVEVectorSelect) \
+ V(SVEBitwiseLogical_Predicated) \
+ V(SVEBitwiseLogicalWithImm_Unpredicated) \
+ V(SVEBitwiseShiftByImm_Predicated) \
+ V(SVEBitwiseShiftByVector_Predicated) \
+ V(SVEBitwiseShiftByWideElements_Predicated) \
+ V(SVEBroadcastBitmaskImm) \
+ V(SVEBroadcastFPImm_Unpredicated) \
+ V(SVEBroadcastGeneralRegister) \
+ V(SVEBroadcastIndexElement) \
+ V(SVEBroadcastIntImm_Unpredicated) \
+ V(SVECompressActiveElements) \
+ V(SVEConditionallyBroadcastElementToVector) \
+ V(SVEConditionallyExtractElementToSIMDFPScalar) \
+ V(SVEConditionallyExtractElementToGeneralRegister) \
+ V(SVEConditionallyTerminateScalars) \
+ V(SVEConstructivePrefix_Unpredicated) \
+ V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \
+ V(SVEContiguousLoad_ScalarPlusImm) \
+ V(SVEContiguousLoad_ScalarPlusScalar) \
+ V(SVEContiguousNonFaultLoad_ScalarPlusImm) \
+ V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \
+ V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \
+ V(SVEContiguousNonTemporalStore_ScalarPlusImm) \
+ V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \
+ V(SVEContiguousPrefetch_ScalarPlusImm) \
+ V(SVEContiguousPrefetch_ScalarPlusScalar) \
+ V(SVEContiguousStore_ScalarPlusImm) \
+ V(SVEContiguousStore_ScalarPlusScalar) \
+ V(SVECopySIMDFPScalarRegisterToVector_Predicated) \
+ V(SVECopyFPImm_Predicated) \
+ V(SVECopyGeneralRegisterToVector_Predicated) \
+ V(SVECopyIntImm_Predicated) \
+ V(SVEElementCount) \
+ V(SVEExtractElementToSIMDFPScalarRegister) \
+ V(SVEExtractElementToGeneralRegister) \
+ V(SVEFPArithmetic_Predicated) \
+ V(SVEFPArithmeticWithImm_Predicated) \
+ V(SVEFPConvertPrecision) \
+ V(SVEFPConvertToInt) \
+ V(SVEFPExponentialAccelerator) \
+ V(SVEFPRoundToIntegralValue) \
+ V(SVEFPTrigMulAddCoefficient) \
+ V(SVEFPTrigSelectCoefficient) \
+ V(SVEFPUnaryOp) \
+ V(SVEIncDecRegisterByElementCount) \
+ V(SVEIncDecVectorByElementCount) \
+ V(SVEInsertSIMDFPScalarRegister) \
+ V(SVEInsertGeneralRegister) \
+ V(SVEIntAddSubtractImm_Unpredicated) \
+ V(SVEIntAddSubtractVectors_Predicated) \
+ V(SVEIntCompareScalarCountAndLimit) \
+ V(SVEIntConvertToFP) \
+ V(SVEIntDivideVectors_Predicated) \
+ V(SVEIntMinMaxImm_Unpredicated) \
+ V(SVEIntMinMaxDifference_Predicated) \
+ V(SVEIntMulImm_Unpredicated) \
+ V(SVEIntMulVectors_Predicated) \
+ V(SVELoadAndBroadcastElement) \
+ V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \
+ V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \
+ V(SVELoadMultipleStructures_ScalarPlusImm) \
+ V(SVELoadMultipleStructures_ScalarPlusScalar) \
+ V(SVELoadPredicateRegister) \
+ V(SVELoadVectorRegister) \
+ V(SVEPartitionBreakCondition) \
+ V(SVEPermutePredicateElements) \
+ V(SVEPredicateFirstActive) \
+ V(SVEPredicateInitialize) \
+ V(SVEPredicateNextActive) \
+ V(SVEPredicateReadFromFFR_Predicated) \
+ V(SVEPredicateReadFromFFR_Unpredicated) \
+ V(SVEPredicateTest) \
+ V(SVEPredicateZero) \
+ V(SVEPropagateBreakToNextPartition) \
+ V(SVEReversePredicateElements) \
+ V(SVEReverseVectorElements) \
+ V(SVEReverseWithinElements) \
+ V(SVESaturatingIncDecRegisterByElementCount) \
+ V(SVESaturatingIncDecVectorByElementCount) \
+ V(SVEStoreMultipleStructures_ScalarPlusImm) \
+ V(SVEStoreMultipleStructures_ScalarPlusScalar) \
+ V(SVEStorePredicateRegister) \
+ V(SVEStoreVectorRegister) \
+ V(SVETableLookup) \
+ V(SVEUnpackPredicateElements) \
+ V(SVEUnpackVectorElements) \
+ V(SVEVectorSplice_Destructive)
+
+#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \
+ void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \
+ RecordInstructionFeaturesScope scope(this); \
+ scope.Record(CPUFeatures::kSVE); \
+ USE(instr); \
+ }
+VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR)
+#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR
+#undef VIXL_SIMPLE_SVE_VISITOR_LIST
+
void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc
index ce1f33fb..c6859bbc 100644
--- a/src/aarch64/decoder-aarch64.cc
+++ b/src/aarch64/decoder-aarch64.cc
@@ -182,22 +182,45 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) {
case M: \
bit_extract_fn = &Instruction::ExtractBits<M>; \
break;
+ INSTANTIATE_TEMPLATE(0x000001e0);
+ INSTANTIATE_TEMPLATE(0x00000400);
INSTANTIATE_TEMPLATE(0x00000800);
INSTANTIATE_TEMPLATE(0x00000c00);
+ INSTANTIATE_TEMPLATE(0x00001000);
+ INSTANTIATE_TEMPLATE(0x00001800);
INSTANTIATE_TEMPLATE(0x00001c00);
INSTANTIATE_TEMPLATE(0x00004000);
INSTANTIATE_TEMPLATE(0x00008000);
INSTANTIATE_TEMPLATE(0x0000f000);
INSTANTIATE_TEMPLATE(0x0000fc00);
+ INSTANTIATE_TEMPLATE(0x00060010);
+ INSTANTIATE_TEMPLATE(0x00093e00);
+ INSTANTIATE_TEMPLATE(0x000c1000);
+ INSTANTIATE_TEMPLATE(0x00100000);
+ INSTANTIATE_TEMPLATE(0x00101800);
+ INSTANTIATE_TEMPLATE(0x00140000);
+ INSTANTIATE_TEMPLATE(0x00180000);
+ INSTANTIATE_TEMPLATE(0x00181000);
+ INSTANTIATE_TEMPLATE(0x00190000);
+ INSTANTIATE_TEMPLATE(0x00191400);
+ INSTANTIATE_TEMPLATE(0x001c0000);
+ INSTANTIATE_TEMPLATE(0x001c1800);
INSTANTIATE_TEMPLATE(0x001f0000);
INSTANTIATE_TEMPLATE(0x0020fc00);
INSTANTIATE_TEMPLATE(0x0038f000);
INSTANTIATE_TEMPLATE(0x00400000);
+ INSTANTIATE_TEMPLATE(0x00400010);
INSTANTIATE_TEMPLATE(0x0040f000);
+ INSTANTIATE_TEMPLATE(0x00500000);
INSTANTIATE_TEMPLATE(0x00800000);
+ INSTANTIATE_TEMPLATE(0x00800010);
+ INSTANTIATE_TEMPLATE(0x00801800);
+ INSTANTIATE_TEMPLATE(0x009f0000);
INSTANTIATE_TEMPLATE(0x00c00000);
+ INSTANTIATE_TEMPLATE(0x00c00010);
INSTANTIATE_TEMPLATE(0x00cf8000);
INSTANTIATE_TEMPLATE(0x00db0000);
+ INSTANTIATE_TEMPLATE(0x00dc0000);
INSTANTIATE_TEMPLATE(0x00e00003);
INSTANTIATE_TEMPLATE(0x00f80400);
INSTANTIATE_TEMPLATE(0x01e00000);
@@ -233,6 +256,7 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) {
INSTANTIATE_TEMPLATE(0xc4400000);
INSTANTIATE_TEMPLATE(0xc4c00000);
INSTANTIATE_TEMPLATE(0xe0400000);
+ INSTANTIATE_TEMPLATE(0xe120e000);
INSTANTIATE_TEMPLATE(0xe3c00000);
INSTANTIATE_TEMPLATE(0xf1200000);
#undef INSTANTIATE_TEMPLATE
@@ -259,20 +283,44 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) {
instantiated = true; \
}
INSTANTIATE_TEMPLATE(0x0000001c, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00000210, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x000003c0, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00001c00, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00001c0f, 0x00000000);
INSTANTIATE_TEMPLATE(0x00003000, 0x00000000);
INSTANTIATE_TEMPLATE(0x00007800, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x0000e000, 0x0000a000);
INSTANTIATE_TEMPLATE(0x0000f000, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00030400, 0x00000000);
INSTANTIATE_TEMPLATE(0x0003801f, 0x0000000d);
+ INSTANTIATE_TEMPLATE(0x00060210, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00060810, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00060a10, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00060bf0, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00061e10, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00061e10, 0x00000400);
+ INSTANTIATE_TEMPLATE(0x00070200, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x000b1e10, 0x00000000);
INSTANTIATE_TEMPLATE(0x000f0000, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00130e1f, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00130fff, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00180000, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00180000, 0x00100000);
+ INSTANTIATE_TEMPLATE(0x001e0000, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x001f0000, 0x00000000);
INSTANTIATE_TEMPLATE(0x001f0000, 0x001f0000);
INSTANTIATE_TEMPLATE(0x0038e000, 0x00000000);
INSTANTIATE_TEMPLATE(0x0039e000, 0x00002000);
INSTANTIATE_TEMPLATE(0x003ae000, 0x00002000);
INSTANTIATE_TEMPLATE(0x003ce000, 0x00042000);
+ INSTANTIATE_TEMPLATE(0x005f0000, 0x001f0000);
INSTANTIATE_TEMPLATE(0x00780000, 0x00000000);
+ INSTANTIATE_TEMPLATE(0x00870210, 0x00000000);
INSTANTIATE_TEMPLATE(0x00c00000, 0x00000000);
INSTANTIATE_TEMPLATE(0x00c00000, 0x00800000);
INSTANTIATE_TEMPLATE(0x00c00000, 0x00c00000);
+ INSTANTIATE_TEMPLATE(0x00c00010, 0x00800000);
+ INSTANTIATE_TEMPLATE(0x00ca1e10, 0x00000000);
INSTANTIATE_TEMPLATE(0x01000010, 0x00000000);
INSTANTIATE_TEMPLATE(0x20000800, 0x00000000);
INSTANTIATE_TEMPLATE(0x20008000, 0x00000000);
@@ -312,14 +360,16 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) {
bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
// EitherOr optimisation: if there are only one or two patterns in the table,
// try to optimise the node to exploit that.
- if ((pattern_table_.size() == 2) && (GetSampledBitsCount() > 1)) {
+ size_t table_size = pattern_table_.size();
+ if ((table_size <= 2) && (GetSampledBitsCount() > 1)) {
// TODO: support 'x' in this optimisation by dropping the sampled bit
// positions before making the mask/value.
if ((strchr(pattern_table_[0].pattern, 'x') == NULL) &&
- (strcmp(pattern_table_[1].pattern, "otherwise") == 0)) {
+ ((table_size == 1) ||
+ (strcmp(pattern_table_[1].pattern, "otherwise") == 0))) {
// A pattern table consisting of a fixed pattern with no x's, and an
- // "otherwise" case. Optimise this into an instruction mask and value
- // test.
+ // "otherwise" or absent case. Optimise this into an instruction mask and
+ // value test.
uint32_t single_decode_mask = 0;
uint32_t single_decode_value = 0;
std::vector<uint8_t> bits = GetSampledBits();
@@ -332,7 +382,6 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
single_decode_value |= 1U << bits[i];
}
}
-
BitExtractFn bit_extract_fn =
GetBitExtractFunction(single_decode_mask, single_decode_value);
@@ -342,7 +391,9 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
// Set DecodeNode for when the instruction after masking doesn't match the
// value.
- CompileNodeForBits(decoder, pattern_table_[1].handler, 0);
+ const char* doesnt_match_handler =
+ (table_size == 1) ? "VisitUnallocated" : pattern_table_[1].handler;
+ CompileNodeForBits(decoder, doesnt_match_handler, 0);
// Set DecodeNode for when it does match.
CompileNodeForBits(decoder, pattern_table_[0].handler, 1);
diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h
index c0f47c36..38540195 100644
--- a/src/aarch64/decoder-aarch64.h
+++ b/src/aarch64/decoder-aarch64.h
@@ -38,99 +38,239 @@
// List macro containing all visitors needed by the decoder class.
-#define VISITOR_LIST_THAT_RETURN(V) \
- V(AddSubExtended) \
- V(AddSubImmediate) \
- V(AddSubShifted) \
- V(AddSubWithCarry) \
- V(AtomicMemory) \
- V(Bitfield) \
- V(CompareBranch) \
- V(ConditionalBranch) \
- V(ConditionalCompareImmediate) \
- V(ConditionalCompareRegister) \
- V(ConditionalSelect) \
- V(Crypto2RegSHA) \
- V(Crypto3RegSHA) \
- V(CryptoAES) \
- V(DataProcessing1Source) \
- V(DataProcessing2Source) \
- V(DataProcessing3Source) \
- V(Exception) \
- V(Extract) \
- V(EvaluateIntoFlags) \
- V(FPCompare) \
- V(FPConditionalCompare) \
- V(FPConditionalSelect) \
- V(FPDataProcessing1Source) \
- V(FPDataProcessing2Source) \
- V(FPDataProcessing3Source) \
- V(FPFixedPointConvert) \
- V(FPImmediate) \
- V(FPIntegerConvert) \
- V(LoadLiteral) \
- V(LoadStoreExclusive) \
- V(LoadStorePAC) \
- V(LoadStorePairNonTemporal) \
- V(LoadStorePairOffset) \
- V(LoadStorePairPostIndex) \
- V(LoadStorePairPreIndex) \
- V(LoadStorePostIndex) \
- V(LoadStorePreIndex) \
- V(LoadStoreRCpcUnscaledOffset) \
- V(LoadStoreRegisterOffset) \
- V(LoadStoreUnscaledOffset) \
- V(LoadStoreUnsignedOffset) \
- V(LogicalImmediate) \
- V(LogicalShifted) \
- V(MoveWideImmediate) \
- V(NEON2RegMisc) \
- V(NEON2RegMiscFP16) \
- V(NEON3Different) \
- V(NEON3Same) \
- V(NEON3SameExtra) \
- V(NEON3SameFP16) \
- V(NEONAcrossLanes) \
- V(NEONByIndexedElement) \
- V(NEONCopy) \
- V(NEONExtract) \
- V(NEONLoadStoreMultiStruct) \
- V(NEONLoadStoreMultiStructPostIndex) \
- V(NEONLoadStoreSingleStruct) \
- V(NEONLoadStoreSingleStructPostIndex) \
- V(NEONModifiedImmediate) \
- V(NEONPerm) \
- V(NEONScalar2RegMisc) \
- V(NEONScalar2RegMiscFP16) \
- V(NEONScalar3Diff) \
- V(NEONScalar3Same) \
- V(NEONScalar3SameExtra) \
- V(NEONScalar3SameFP16) \
- V(NEONScalarByIndexedElement) \
- V(NEONScalarCopy) \
- V(NEONScalarPairwise) \
- V(NEONScalarShiftImmediate) \
- V(NEONShiftImmediate) \
- V(NEONTable) \
- V(PCRelAddressing) \
- V(RotateRightIntoFlags) \
- V(System) \
- V(TestBranch) \
- V(UnconditionalBranch) \
- V(UnconditionalBranchToRegister)
-
-// TODO: We shouldn't expose debug-only behaviour like this. Instead, we should
-// use release-mode aborts where appropriate, and merge thse into a single
-// no-return list.
-#define VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \
- V(Unallocated) \
+#define VISITOR_LIST_THAT_RETURN(V) \
+ V(AddSubExtended) \
+ V(AddSubImmediate) \
+ V(AddSubShifted) \
+ V(AddSubWithCarry) \
+ V(AtomicMemory) \
+ V(Bitfield) \
+ V(CompareBranch) \
+ V(ConditionalBranch) \
+ V(ConditionalCompareImmediate) \
+ V(ConditionalCompareRegister) \
+ V(ConditionalSelect) \
+ V(Crypto2RegSHA) \
+ V(Crypto3RegSHA) \
+ V(CryptoAES) \
+ V(DataProcessing1Source) \
+ V(DataProcessing2Source) \
+ V(DataProcessing3Source) \
+ V(EvaluateIntoFlags) \
+ V(Exception) \
+ V(Extract) \
+ V(FPCompare) \
+ V(FPConditionalCompare) \
+ V(FPConditionalSelect) \
+ V(FPDataProcessing1Source) \
+ V(FPDataProcessing2Source) \
+ V(FPDataProcessing3Source) \
+ V(FPFixedPointConvert) \
+ V(FPImmediate) \
+ V(FPIntegerConvert) \
+ V(LoadLiteral) \
+ V(LoadStoreExclusive) \
+ V(LoadStorePAC) \
+ V(LoadStorePairNonTemporal) \
+ V(LoadStorePairOffset) \
+ V(LoadStorePairPostIndex) \
+ V(LoadStorePairPreIndex) \
+ V(LoadStorePostIndex) \
+ V(LoadStorePreIndex) \
+ V(LoadStoreRCpcUnscaledOffset) \
+ V(LoadStoreRegisterOffset) \
+ V(LoadStoreUnscaledOffset) \
+ V(LoadStoreUnsignedOffset) \
+ V(LogicalImmediate) \
+ V(LogicalShifted) \
+ V(MoveWideImmediate) \
+ V(NEON2RegMisc) \
+ V(NEON2RegMiscFP16) \
+ V(NEON3Different) \
+ V(NEON3Same) \
+ V(NEON3SameExtra) \
+ V(NEON3SameFP16) \
+ V(NEONAcrossLanes) \
+ V(NEONByIndexedElement) \
+ V(NEONCopy) \
+ V(NEONExtract) \
+ V(NEONLoadStoreMultiStruct) \
+ V(NEONLoadStoreMultiStructPostIndex) \
+ V(NEONLoadStoreSingleStruct) \
+ V(NEONLoadStoreSingleStructPostIndex) \
+ V(NEONModifiedImmediate) \
+ V(NEONPerm) \
+ V(NEONScalar2RegMisc) \
+ V(NEONScalar2RegMiscFP16) \
+ V(NEONScalar3Diff) \
+ V(NEONScalar3Same) \
+ V(NEONScalar3SameExtra) \
+ V(NEONScalar3SameFP16) \
+ V(NEONScalarByIndexedElement) \
+ V(NEONScalarCopy) \
+ V(NEONScalarPairwise) \
+ V(NEONScalarShiftImmediate) \
+ V(NEONShiftImmediate) \
+ V(NEONTable) \
+ V(PCRelAddressing) \
+ V(RotateRightIntoFlags) \
+ V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \
+ V(SVE32BitGatherLoad_VectorPlusImm) \
+ V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitGatherPrefetch_VectorPlusImm) \
+ V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \
+ V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \
+ V(SVE32BitScatterStore_VectorPlusImm) \
+ V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \
+ V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \
+ V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \
+ V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \
+ V(SVE64BitGatherLoad_VectorPlusImm) \
+ V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \
+ V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+ V(SVE64BitGatherPrefetch_VectorPlusImm) \
+ V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \
+ V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \
+ V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \
+ V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+ V(SVE64BitScatterStore_VectorPlusImm) \
+ V(SVEAddressGeneration) \
+ V(SVEBitwiseLogicalUnpredicated) \
+ V(SVEBitwiseShiftUnpredicated) \
+ V(SVEFFRInitialise) \
+ V(SVEFFRWriteFromPredicate) \
+ V(SVEFPAccumulatingReduction) \
+ V(SVEFPArithmeticUnpredicated) \
+ V(SVEFPCompareVectors) \
+ V(SVEFPCompareWithZero) \
+ V(SVEFPComplexAddition) \
+ V(SVEFPComplexMulAdd) \
+ V(SVEFPComplexMulAddIndex) \
+ V(SVEFPFastReduction) \
+ V(SVEFPMulIndex) \
+ V(SVEFPMulAdd) \
+ V(SVEFPMulAddIndex) \
+ V(SVEFPUnaryOpUnpredicated) \
+ V(SVEIncDecByPredicateCount) \
+ V(SVEIndexGeneration) \
+ V(SVEIntArithmeticUnpredicated) \
+ V(SVEIntCompareSignedImm) \
+ V(SVEIntCompareUnsignedImm) \
+ V(SVEIntCompareVectors) \
+ V(SVEIntMulAddPredicated) \
+ V(SVEIntMulAddUnpredicated) \
+ V(SVEIntReduction) \
+ V(SVEIntUnaryArithmeticPredicated) \
+ V(SVEMovprfx) \
+ V(SVEMulIndex) \
+ V(SVEPermuteVectorExtract) \
+ V(SVEPermuteVectorInterleaving) \
+ V(SVEPredicateCount) \
+ V(SVEPredicateLogical) \
+ V(SVEPropagateBreak) \
+ V(SVEStackFrameAdjustment) \
+ V(SVEStackFrameSize) \
+ V(SVEVectorSelect) \
+ V(SVEBitwiseLogical_Predicated) \
+ V(SVEBitwiseLogicalWithImm_Unpredicated) \
+ V(SVEBitwiseShiftByImm_Predicated) \
+ V(SVEBitwiseShiftByVector_Predicated) \
+ V(SVEBitwiseShiftByWideElements_Predicated) \
+ V(SVEBroadcastBitmaskImm) \
+ V(SVEBroadcastFPImm_Unpredicated) \
+ V(SVEBroadcastGeneralRegister) \
+ V(SVEBroadcastIndexElement) \
+ V(SVEBroadcastIntImm_Unpredicated) \
+ V(SVECompressActiveElements) \
+ V(SVEConditionallyBroadcastElementToVector) \
+ V(SVEConditionallyExtractElementToSIMDFPScalar) \
+ V(SVEConditionallyExtractElementToGeneralRegister) \
+ V(SVEConditionallyTerminateScalars) \
+ V(SVEConstructivePrefix_Unpredicated) \
+ V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \
+ V(SVEContiguousLoad_ScalarPlusImm) \
+ V(SVEContiguousLoad_ScalarPlusScalar) \
+ V(SVEContiguousNonFaultLoad_ScalarPlusImm) \
+ V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \
+ V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \
+ V(SVEContiguousNonTemporalStore_ScalarPlusImm) \
+ V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \
+ V(SVEContiguousPrefetch_ScalarPlusImm) \
+ V(SVEContiguousPrefetch_ScalarPlusScalar) \
+ V(SVEContiguousStore_ScalarPlusImm) \
+ V(SVEContiguousStore_ScalarPlusScalar) \
+ V(SVECopySIMDFPScalarRegisterToVector_Predicated) \
+ V(SVECopyFPImm_Predicated) \
+ V(SVECopyGeneralRegisterToVector_Predicated) \
+ V(SVECopyIntImm_Predicated) \
+ V(SVEElementCount) \
+ V(SVEExtractElementToSIMDFPScalarRegister) \
+ V(SVEExtractElementToGeneralRegister) \
+ V(SVEFPArithmetic_Predicated) \
+ V(SVEFPArithmeticWithImm_Predicated) \
+ V(SVEFPConvertPrecision) \
+ V(SVEFPConvertToInt) \
+ V(SVEFPExponentialAccelerator) \
+ V(SVEFPRoundToIntegralValue) \
+ V(SVEFPTrigMulAddCoefficient) \
+ V(SVEFPTrigSelectCoefficient) \
+ V(SVEFPUnaryOp) \
+ V(SVEIncDecRegisterByElementCount) \
+ V(SVEIncDecVectorByElementCount) \
+ V(SVEInsertSIMDFPScalarRegister) \
+ V(SVEInsertGeneralRegister) \
+ V(SVEIntAddSubtractImm_Unpredicated) \
+ V(SVEIntAddSubtractVectors_Predicated) \
+ V(SVEIntCompareScalarCountAndLimit) \
+ V(SVEIntConvertToFP) \
+ V(SVEIntDivideVectors_Predicated) \
+ V(SVEIntMinMaxImm_Unpredicated) \
+ V(SVEIntMinMaxDifference_Predicated) \
+ V(SVEIntMulImm_Unpredicated) \
+ V(SVEIntMulVectors_Predicated) \
+ V(SVELoadAndBroadcastElement) \
+ V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \
+ V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \
+ V(SVELoadMultipleStructures_ScalarPlusImm) \
+ V(SVELoadMultipleStructures_ScalarPlusScalar) \
+ V(SVELoadPredicateRegister) \
+ V(SVELoadVectorRegister) \
+ V(SVEPartitionBreakCondition) \
+ V(SVEPermutePredicateElements) \
+ V(SVEPredicateFirstActive) \
+ V(SVEPredicateInitialize) \
+ V(SVEPredicateNextActive) \
+ V(SVEPredicateReadFromFFR_Predicated) \
+ V(SVEPredicateReadFromFFR_Unpredicated) \
+ V(SVEPredicateTest) \
+ V(SVEPredicateZero) \
+ V(SVEPropagateBreakToNextPartition) \
+ V(SVEReversePredicateElements) \
+ V(SVEReverseVectorElements) \
+ V(SVEReverseWithinElements) \
+ V(SVESaturatingIncDecRegisterByElementCount) \
+ V(SVESaturatingIncDecVectorByElementCount) \
+ V(SVEStoreMultipleStructures_ScalarPlusImm) \
+ V(SVEStoreMultipleStructures_ScalarPlusScalar) \
+ V(SVEStorePredicateRegister) \
+ V(SVEStoreVectorRegister) \
+ V(SVETableLookup) \
+ V(SVEUnpackPredicateElements) \
+ V(SVEUnpackVectorElements) \
+ V(SVEVectorSplice_Destructive) \
+ V(System) \
+ V(TestBranch) \
+ V(Unallocated) \
+ V(UnconditionalBranch) \
+ V(UnconditionalBranchToRegister) \
V(Unimplemented)
#define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved)
-#define VISITOR_LIST(V) \
- VISITOR_LIST_THAT_RETURN(V) \
- VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \
+#define VISITOR_LIST(V) \
+ VISITOR_LIST_THAT_RETURN(V) \
VISITOR_LIST_THAT_DONT_RETURN(V)
namespace vixl {
@@ -138,6 +278,12 @@ namespace aarch64 {
// The Visitor interface. Disassembler and simulator (and other tools)
// must provide implementations for all of these functions.
+//
+// Note that this class must change in breaking ways with even minor additions
+// to VIXL, and so its API should be considered unstable. User classes that
+// inherit from this one should be expected to break even on minor version
+// updates. If this is a problem, consider using DecoderVisitorWithDefaults
+// instead.
class DecoderVisitor {
public:
enum VisitorConstness { kConstVisitor, kNonConstVisitor };
@@ -160,6 +306,25 @@ class DecoderVisitor {
const VisitorConstness constness_;
};
+// As above, but a default (no-op) implementation for each visitor is provided.
+// This is useful for derived class that only care about specific visitors.
+//
+// A minor version update may add a visitor, but will never remove one, so it is
+// safe (and recommended) to use `override` in derived classes.
+class DecoderVisitorWithDefaults : public DecoderVisitor {
+ public:
+ explicit DecoderVisitorWithDefaults(
+ VisitorConstness constness = kConstVisitor)
+ : DecoderVisitor(constness) {}
+
+ virtual ~DecoderVisitorWithDefaults() {}
+
+#define DECLARE(A) \
+ virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE { USE(instr); }
+ VISITOR_LIST(DECLARE)
+#undef DECLARE
+};
+
class DecodeNode;
class CompiledDecodeNode;
@@ -257,7 +422,7 @@ class Decoder {
};
const int kMaxDecodeSampledBits = 16;
-const int kMaxDecodeMappings = 22;
+const int kMaxDecodeMappings = 100;
typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
typedef uint32_t (Instruction::*BitExtractFn)(void) const;
diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h
index def27fa1..53f283bb 100644
--- a/src/aarch64/decoder-constants-aarch64.h
+++ b/src/aarch64/decoder-constants-aarch64.h
@@ -39,6 +39,7 @@ static const DecodeMapping kDecodeMapping[] = {
{ "Root",
{28, 27, 26, 25},
{ {"0000", "DecodeReserved"},
+ {"0010", "DecodeSVE"},
{"100x", "DecodeDataProcessingImmediate"},
{"101x", "DecodeBranchesExceptionAndSystem"},
{"x1x0", "DecodeLoadsAndStores"},
@@ -124,6 +125,720 @@ static const DecodeMapping kDecodeMapping[] = {
},
},
+ { "DecodeSVE",
+ {31, 30, 29, 24, 21, 15, 14, 13},
+ { {"00000x1x", "VisitSVEIntMulAddPredicated"},
+ {"00000000", "DecodeSVE00000000"},
+ {"00000001", "DecodeSVE00000001"},
+ {"00000100", "DecodeSVE00000100"},
+ {"00000101", "VisitSVEIntUnaryArithmeticPredicated"},
+ {"00001000", "VisitSVEIntArithmeticUnpredicated"},
+ {"00001001", "VisitSVEBitwiseLogicalUnpredicated"},
+ {"00001010", "DecodeSVE00001010"},
+ {"00001100", "VisitSVEBitwiseShiftUnpredicated"},
+ {"00001101", "DecodeSVE00001101"},
+ {"00001110", "DecodeSVE00001110"},
+ {"00001111", "DecodeSVE00001111"},
+ {"000100xx", "DecodeSVE000100xx"},
+ {"0001010x", "DecodeSVE0001010x"},
+ {"00010110", "DecodeSVE00010110"},
+ {"00010111", "DecodeSVE00010111"},
+ {"00011000", "VisitSVEPermuteVectorExtract"},
+ {"00011001", "DecodeSVE00011001"},
+ {"00011010", "DecodeSVE00011010"},
+ {"00011011", "VisitSVEPermuteVectorInterleaving"},
+ {"00011100", "DecodeSVE00011100"},
+ {"00011101", "DecodeSVE00011101"},
+ {"0001111x", "VisitSVEVectorSelect"},
+ {"00100xxx", "VisitSVEIntCompareVectors"},
+ {"00101xxx", "VisitSVEIntCompareUnsignedImm"},
+ {"00110x0x", "VisitSVEIntCompareSignedImm"},
+ {"0011001x", "DecodeSVE0011001x"},
+ {"00110110", "DecodeSVE00110110"},
+ {"00110111", "DecodeSVE00110111"},
+ {"00111000", "VisitSVEIntCompareScalarCountAndLimit"},
+ {"00111001", "UnallocSVEConditionallyTerminateScalars"},
+ {"00111100", "DecodeSVE00111100"},
+ {"00111101", "UnallocSVEPredicateCount"},
+ {"0011111x", "DecodeSVE0011111x"},
+ {"010000xx", "VisitSVEIntMulAddUnpredicated"},
+ {"01001xxx", "VisitSVEMulIndex"},
+ {"011000xx", "VisitSVEFPComplexMulAdd"},
+ {"01100100", "UnallocSVEFPComplexAddition"},
+ {"01101000", "DecodeSVE01101000"},
+ {"01101001", "UnallocSVEFPMulIndex"},
+ {"01110x1x", "VisitSVEFPCompareVectors"},
+ {"01110000", "VisitSVEFPArithmeticUnpredicated"},
+ {"01110001", "DecodeSVE01110001"},
+ {"01110100", "DecodeSVE01110100"},
+ {"01110101", "DecodeSVE01110101"},
+ {"01111xxx", "VisitSVEFPMulAdd"},
+ {"100x010x", "UnallocSVELoadAndBroadcastElement"},
+ {"100x0110", "DecodeSVE100x0110"},
+ {"100x0111", "DecodeSVE100x0111"},
+ {"100x11xx", "DecodeSVE100x11xx"},
+ {"100000xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+ {"100010xx", "DecodeSVE100010xx"},
+ {"100100x1", "DecodeSVE100100x1"},
+ {"10010000", "DecodeSVE10010000"},
+ {"10010010", "DecodeSVE10010010"},
+ {"100110x1", "DecodeSVE100110x1"},
+ {"10011000", "DecodeSVE10011000"},
+ {"10011010", "DecodeSVE10011010"},
+ {"101xx000", "VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar"},
+ {"101xx001", "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm"},
+ {"101xx010", "VisitSVEContiguousLoad_ScalarPlusScalar"},
+ {"101xx011", "VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar"},
+ {"101xx101", "DecodeSVE101xx101"},
+ {"101x0110", "DecodeSVE101x0110"},
+ {"101x0111", "DecodeSVE101x0111"},
+ {"101x1110", "VisitSVELoadMultipleStructures_ScalarPlusScalar"},
+ {"101x1111", "DecodeSVE101x1111"},
+ {"110x00xx", "VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets"},
+ {"110x0111", "DecodeSVE110x0111"},
+ {"1100010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ {"11000110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ {"110010xx", "DecodeSVE110010xx"},
+ {"110011xx", "DecodeSVE110011xx"},
+ {"1101010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ {"11010110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ {"110110xx", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"},
+ {"110111xx", "DecodeSVE110111xx"},
+ {"111x0011", "DecodeSVE111x0011"},
+ {"111x01x0", "DecodeSVE111x01x0"},
+ {"111x0101", "DecodeSVE111x0101"},
+ {"111x0111", "DecodeSVE111x0111"},
+ {"111x1011", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"},
+ {"111x11x0", "DecodeSVE111x11x0"},
+ {"111x1101", "DecodeSVE111x1101"},
+ {"111x1111", "DecodeSVE111x1111"},
+ {"1110x010", "VisitSVEContiguousStore_ScalarPlusScalar"},
+ {"1111x000", "UnallocSVEStorePredicateRegister"},
+ {"1111x010", "DecodeSVE1111x010"},
+ },
+ },
+
+ { "DecodeSVE00000000",
+ {20, 19, 18},
+ { {"00x", "VisitSVEIntAddSubtractVectors_Predicated"},
+ {"01x", "VisitSVEIntMinMaxDifference_Predicated"},
+ {"100", "VisitSVEIntMulVectors_Predicated"},
+ {"101", "VisitSVEIntDivideVectors_Predicated"},
+ {"11x", "VisitSVEBitwiseLogical_Predicated"},
+ },
+ },
+
+ { "DecodeSVE00000100",
+ {20, 19},
+ { {"0x", "VisitSVEBitwiseShiftByImm_Predicated"},
+ {"10", "VisitSVEBitwiseShiftByVector_Predicated"},
+ {"11", "VisitSVEBitwiseShiftByWideElements_Predicated"},
+ },
+ },
+
+ { "DecodeSVE00001010",
+ {23, 12, 11},
+ { {"x0x", "VisitSVEIndexGeneration"},
+ {"010", "VisitSVEStackFrameAdjustment"},
+ {"110", "UnallocSVEStackFrameSize"},
+ },
+ },
+
+ { "UnallocSVEStackFrameSize",
+ {22, 20, 19, 18, 17, 16},
+ { {"011111", "VisitSVEStackFrameSize"},
+ },
+ },
+
+ { "DecodeSVE00001101",
+ {12, 11, 10},
+ { {"0xx", "VisitSVEAddressGeneration"},
+ {"10x", "VisitSVEFPTrigSelectCoefficient"},
+ {"110", "VisitSVEFPExponentialAccelerator"},
+ {"111", "VisitSVEConstructivePrefix_Unpredicated"},
+ },
+ },
+
+ { "DecodeSVE00001110",
+ {20, 12, 11},
+ { {"00x", "VisitSVESaturatingIncDecVectorByElementCount"},
+ {"100", "VisitSVEIncDecVectorByElementCount"},
+ },
+ },
+
+ { "DecodeSVE00001111",
+ {20, 12, 11},
+ { {"x1x", "VisitSVESaturatingIncDecRegisterByElementCount"},
+ {"000", "VisitSVEElementCount"},
+ {"100", "VisitSVEIncDecRegisterByElementCount"},
+ },
+ },
+
+ { "DecodeSVE000100xx",
+ {23, 22, 20, 19, 18},
+ { {"xx1xx", "VisitSVECopyIntImm_Predicated"},
+ {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"11000", "VisitSVEBroadcastBitmaskImm"},
+ },
+ },
+
+ { "DecodeSVE0001010x",
+ {23, 22, 20, 19, 18},
+ { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"11000", "VisitSVEBroadcastBitmaskImm"},
+ },
+ },
+
+ { "DecodeSVE00010110",
+ {23, 22, 20, 19, 18},
+ { {"xx1xx", "VisitSVECopyFPImm_Predicated"},
+ {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"11000", "VisitSVEBroadcastBitmaskImm"},
+ },
+ },
+
+ { "DecodeSVE00010111",
+ {23, 22, 20, 19, 18},
+ { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+ {"11000", "VisitSVEBroadcastBitmaskImm"},
+ },
+ },
+
+ { "UnallocSVEBroadcastIndexElement",
+ {10},
+ { {"0", "VisitSVEBroadcastIndexElement"},
+ },
+ },
+
+ { "UnallocSVETableLookup",
+ {10},
+ { {"0", "VisitSVETableLookup"},
+ },
+ },
+
+ { "UnallocSVEBroadcastGeneralRegister",
+ {17, 16, 10},
+ { {"000", "VisitSVEBroadcastGeneralRegister"},
+ },
+ },
+
+ { "UnallocSVEInsertGeneralRegister",
+ {17, 16, 10},
+ { {"000", "VisitSVEInsertGeneralRegister"},
+ },
+ },
+
+ { "UnallocSVEUnpackVectorElements",
+ {10},
+ { {"0", "VisitSVEUnpackVectorElements"},
+ },
+ },
+
+ { "UnallocSVEInsertSIMDFPScalarRegister",
+ {17, 16, 10},
+ { {"000", "VisitSVEInsertSIMDFPScalarRegister"},
+ },
+ },
+
+ { "UnallocSVEReverseVectorElements",
+ {17, 16, 10},
+ { {"000", "VisitSVEReverseVectorElements"},
+ },
+ },
+
+ { "DecodeSVE00011001",
+ {20, 19, 18, 12, 11},
+ { {"xxx00", "UnallocSVEBroadcastIndexElement"},
+ {"xxx10", "UnallocSVETableLookup"},
+ {"00011", "UnallocSVEBroadcastGeneralRegister"},
+ {"00111", "UnallocSVEInsertGeneralRegister"},
+ {"10011", "UnallocSVEUnpackVectorElements"},
+ {"10111", "UnallocSVEInsertSIMDFPScalarRegister"},
+ {"11011", "UnallocSVEReverseVectorElements"},
+ },
+ },
+
+ { "UnallocSVEPermutePredicateElements",
+ {9, 4},
+ { {"00", "VisitSVEPermutePredicateElements"},
+ },
+ },
+
+ { "UnallocSVEUnpackPredicateElements",
+ {23, 22, 19, 17, 12, 11, 10, 9, 4},
+ { {"000000000", "VisitSVEUnpackPredicateElements"},
+ },
+ },
+
+ { "UnallocSVEReversePredicateElements",
+ {19, 17, 16, 12, 11, 10, 9, 4},
+ { {"00000000", "VisitSVEReversePredicateElements"},
+ },
+ },
+
+ { "DecodeSVE00011010",
+ {20, 18},
+ { {"0x", "UnallocSVEPermutePredicateElements"},
+ {"10", "UnallocSVEUnpackPredicateElements"},
+ {"11", "UnallocSVEReversePredicateElements"},
+ },
+ },
+
+ { "DecodeSVE00011100",
+ {23, 20, 19, 18, 17, 16},
+ { {"x00000", "VisitSVECopySIMDFPScalarRegisterToVector_Predicated"},
+ {"x0001x", "VisitSVEExtractElementToSIMDFPScalarRegister"},
+ {"x001xx", "VisitSVEReverseWithinElements"},
+ {"x0100x", "VisitSVEConditionallyBroadcastElementToVector"},
+ {"x0101x", "VisitSVEConditionallyExtractElementToSIMDFPScalar"},
+ {"x01100", "VisitSVEVectorSplice_Destructive"},
+ {"100001", "VisitSVECompressActiveElements"},
+ },
+ },
+
+ { "DecodeSVE00011101",
+ {20, 19, 18, 17, 16},
+ { {"0000x", "VisitSVEExtractElementToGeneralRegister"},
+ {"01000", "VisitSVECopyGeneralRegisterToVector_Predicated"},
+ {"1000x", "VisitSVEConditionallyExtractElementToGeneralRegister"},
+ },
+ },
+
+ { "UnallocSVEPartitionBreakCondition",
+ {18, 17, 16, 9},
+ { {"0000", "VisitSVEPartitionBreakCondition"},
+ },
+ },
+
+ { "UnallocSVEPropagateBreakToNextPartition",
+ {23, 18, 17, 16, 9, 4},
+ { {"000000", "VisitSVEPropagateBreakToNextPartition"},
+ },
+ },
+
+ { "DecodeSVE0011001x",
+ {20, 19},
+ { {"0x", "VisitSVEPredicateLogical"},
+ {"10", "UnallocSVEPartitionBreakCondition"},
+ {"11", "UnallocSVEPropagateBreakToNextPartition"},
+ },
+ },
+
+ { "UnallocSVEPredicateTest",
+ {18, 17, 9, 4},
+ { {"0000", "VisitSVEPredicateTest"},
+ },
+ },
+
+ { "UnallocSVEPredicateFirstActive",
+ {18, 17, 12, 11, 10, 9, 4},
+ { {"0000000", "VisitSVEPredicateFirstActive"},
+ },
+ },
+
+ { "UnallocSVEPredicateNextActive",
+ {18, 17, 12, 11, 10, 9, 4},
+ { {"0000100", "VisitSVEPredicateNextActive"},
+ },
+ },
+
+ { "DecodeSVE00110110",
+ {20, 19, 16},
+ { {"0xx", "VisitSVEPropagateBreak"},
+ {"100", "UnallocSVEPredicateTest"},
+ {"110", "UnallocSVEPredicateFirstActive"},
+ {"111", "UnallocSVEPredicateNextActive"},
+ },
+ },
+
+ { "UnallocSVEPredicateTest",
+ {18, 17, 9, 4},
+ { {"0000", "VisitSVEPredicateTest"},
+ },
+ },
+
+ { "UnallocSVEPredicateInitialize",
+ {18, 17, 11, 4},
+ { {"0000", "VisitSVEPredicateInitialize"},
+ },
+ },
+
+ { "UnallocSVEPredicateZero",
+ {18, 17, 11, 9, 8, 7, 6, 5, 4},
+ { {"000000000", "VisitSVEPredicateZero"},
+ },
+ },
+
+ { "UnallocSVEPredicateReadFromFFR_Predicated",
+ {18, 17, 11, 9, 4},
+ { {"00000", "VisitSVEPredicateReadFromFFR_Predicated"},
+ },
+ },
+
+ { "UnallocSVEPredicateReadFromFFR_Unpredicated",
+ {18, 17, 11, 9, 8, 7, 6, 5, 4},
+ { {"000000000", "VisitSVEPredicateReadFromFFR_Unpredicated"},
+ },
+ },
+
+ { "DecodeSVE00110111",
+ {20, 19, 16, 12, 10},
+ { {"0xxxx", "VisitSVEPropagateBreak"},
+ {"100xx", "UnallocSVEPredicateTest"},
+ {"11x00", "UnallocSVEPredicateInitialize"},
+ {"11001", "UnallocSVEPredicateZero"},
+ {"11010", "UnallocSVEPredicateReadFromFFR_Predicated"},
+ {"11110", "UnallocSVEPredicateReadFromFFR_Unpredicated"},
+ },
+ },
+
+ { "UnallocSVEConditionallyTerminateScalars",
+ {12, 11, 10, 3, 2, 1, 0},
+ { {"0000000", "VisitSVEConditionallyTerminateScalars"},
+ },
+ },
+
+ { "UnallocSVEPredicateCount_2",
+ {20},
+ { {"0", "VisitSVEPredicateCount"},
+ },
+ },
+
+ { "UnallocSVEIncDecByPredicateCount",
+ {20},
+ { {"0", "VisitSVEIncDecByPredicateCount"},
+ },
+ },
+
+ { "UnallocSVEFFRWriteFromPredicate",
+ {20, 17, 16, 11, 10, 9, 4, 3, 2, 1, 0},
+ { {"00000000000", "VisitSVEFFRWriteFromPredicate"},
+ },
+ },
+
+ { "UnallocSVEFFRInitialise",
+ {20, 17, 16, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
+ { {"000000000000000", "VisitSVEFFRInitialise"},
+ },
+ },
+
+ { "DecodeSVE00111100",
+ {19, 18, 12},
+ { {"0xx", "UnallocSVEPredicateCount_2"},
+ {"1x0", "UnallocSVEIncDecByPredicateCount"},
+ {"101", "UnallocSVEFFRWriteFromPredicate"},
+ {"111", "UnallocSVEFFRInitialise"},
+ },
+ },
+
+ { "UnallocSVEPredicateCount",
+ {20, 19},
+ { {"00", "VisitSVEPredicateCount"},
+ },
+ },
+
+ { "DecodeSVE0011111x",
+ {20, 19, 16},
+ { {"00x", "VisitSVEIntAddSubtractImm_Unpredicated"},
+ {"01x", "VisitSVEIntMinMaxImm_Unpredicated"},
+ {"10x", "VisitSVEIntMulImm_Unpredicated"},
+ {"110", "VisitSVEBroadcastIntImm_Unpredicated"},
+ {"111", "VisitSVEBroadcastFPImm_Unpredicated"},
+ },
+ },
+
+ { "UnallocSVEFPComplexAddition",
+ {20, 19, 18, 17},
+ { {"0000", "VisitSVEFPComplexAddition"},
+ },
+ },
+
+ { "DecodeSVE01101000",
+ {12, 11},
+ { {"00", "VisitSVEFPMulAddIndex"},
+ {"1x", "VisitSVEFPComplexMulAddIndex"},
+ },
+ },
+
+ { "UnallocSVEFPMulIndex",
+ {12, 11, 10},
+ { {"000", "VisitSVEFPMulIndex"},
+ },
+ },
+
+ { "DecodeSVE01110001",
+ {20, 19, 12},
+ { {"00x", "VisitSVEFPFastReduction"},
+ {"011", "VisitSVEFPUnaryOpUnpredicated"},
+ {"10x", "VisitSVEFPCompareWithZero"},
+ {"11x", "VisitSVEFPAccumulatingReduction"},
+ },
+ },
+
+ { "UnallocSVEFPTrigMulAddCoefficient",
+ {12, 11, 10},
+ { {"000", "VisitSVEFPTrigMulAddCoefficient"},
+ },
+ },
+
+ { "UnallocSVEFPArithmeticWithImm_Predicated",
+ {9, 8, 7, 6},
+ { {"0000", "VisitSVEFPArithmeticWithImm_Predicated"},
+ },
+ },
+
+ { "DecodeSVE01110100",
+ {20, 19},
+ { {"0x", "VisitSVEFPArithmetic_Predicated"},
+ {"10", "UnallocSVEFPTrigMulAddCoefficient"},
+ {"11", "UnallocSVEFPArithmeticWithImm_Predicated"},
+ },
+ },
+
+ { "DecodeSVE01110101",
+ {20, 19, 18},
+ { {"00x", "VisitSVEFPRoundToIntegralValue"},
+ {"010", "VisitSVEFPConvertPrecision"},
+ {"011", "VisitSVEFPUnaryOp"},
+ {"10x", "VisitSVEIntConvertToFP"},
+ {"11x", "VisitSVEFPConvertToInt"},
+ },
+ },
+
+ { "UnallocSVELoadAndBroadcastElement",
+ {22},
+ { {"1", "VisitSVELoadAndBroadcastElement"},
+ },
+ },
+
+ { "DecodeSVE100x0110",
+ {22, 4},
+ { {"00", "VisitSVEContiguousPrefetch_ScalarPlusScalar"},
+ {"1x", "VisitSVELoadAndBroadcastElement"},
+ },
+ },
+
+ { "DecodeSVE100x0111",
+ {22, 4},
+ { {"00", "VisitSVE32BitGatherPrefetch_VectorPlusImm"},
+ {"1x", "VisitSVELoadAndBroadcastElement"},
+ },
+ },
+
+ { "DecodeSVE100x11xx",
+ {22},
+ { {"0", "VisitSVE32BitGatherLoad_VectorPlusImm"},
+ {"1", "VisitSVELoadAndBroadcastElement"},
+ },
+ },
+
+ { "DecodeSVE100010xx",
+ {23, 4},
+ { {"00", "VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets"},
+ {"1x", "VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE100100x1",
+ {23, 22, 4},
+ { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+ {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE10010000",
+ {23, 22, 4},
+ { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+ {"100", "VisitSVELoadPredicateRegister"},
+ {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE10010010",
+ {23, 22, 4},
+ { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+ {"10x", "VisitSVELoadVectorRegister"},
+ {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE100110x1",
+ {23, 22, 4},
+ { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
+ {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE10011000",
+ {23, 22, 4},
+ { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
+ {"100", "VisitSVELoadPredicateRegister"},
+ {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE10011010",
+ {23, 22, 4},
+ { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
+ {"10x", "VisitSVELoadVectorRegister"},
+ {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ },
+ },
+
+ { "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm",
+ {20},
+ { {"0", "VisitSVELoadAndBroadcastQuadword_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE101xx101",
+ {20},
+ { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"},
+ {"1", "VisitSVEContiguousNonFaultLoad_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE101x0110",
+ {22},
+ { {"0", "VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar"},
+ {"1", "VisitSVELoadMultipleStructures_ScalarPlusScalar"},
+ },
+ },
+
+ { "DecodeSVE101x0111",
+ {22, 20},
+ { {"00", "VisitSVEContiguousNonTemporalLoad_ScalarPlusImm"},
+ {"10", "VisitSVELoadMultipleStructures_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE101x1111",
+ {22, 20},
+ { {"x0", "VisitSVELoadMultipleStructures_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE110x0111",
+ {22, 4},
+ { {"00", "VisitSVE64BitGatherPrefetch_VectorPlusImm"},
+ {"1x", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ },
+ },
+
+ { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+ {22},
+ { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ },
+ },
+
+ { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+ {22},
+ { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE110010xx",
+ {23, 4},
+ { {"00", "VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets"},
+ {"1x", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE110011xx",
+ {23, 22, 4},
+ { {"x0x", "VisitSVE64BitGatherLoad_VectorPlusImm"},
+ {"010", "VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets"},
+ {"11x", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"},
+ },
+ },
+
+ { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+ {22},
+ { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ },
+ },
+
+ { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+ {22},
+ { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE110111xx",
+ {22},
+ { {"0", "VisitSVE64BitGatherLoad_VectorPlusImm"},
+ {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE111x0011",
+ {22},
+ { {"0", "VisitSVEContiguousNonTemporalStore_ScalarPlusScalar"},
+ {"1", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"},
+ },
+ },
+
+ { "DecodeSVE111x01x0",
+ {22},
+ { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets"},
+ {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE111x0101",
+ {22},
+ { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets"},
+ {"1", "VisitSVE64BitScatterStore_VectorPlusImm"},
+ },
+ },
+
+ { "DecodeSVE111x0111",
+ {22, 20},
+ { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"},
+ {"01", "VisitSVEContiguousNonTemporalStore_ScalarPlusImm"},
+ {"11", "VisitSVEStoreMultipleStructures_ScalarPlusImm"},
+ },
+ },
+
+ { "DecodeSVE111x11x0",
+ {22},
+ { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets"},
+ {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets"},
+ },
+ },
+
+ { "DecodeSVE111x1101",
+ {22},
+ { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets"},
+ {"1", "VisitSVE32BitScatterStore_VectorPlusImm"},
+ },
+ },
+
+ { "DecodeSVE111x1111",
+ {22, 20},
+ { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"},
+ {"x1", "VisitSVEStoreMultipleStructures_ScalarPlusImm"},
+ },
+ },
+
+ { "UnallocSVEStorePredicateRegister",
+ {23, 22, 4},
+ { {"100", "VisitSVEStorePredicateRegister"},
+ },
+ },
+
+ { "DecodeSVE1111x010",
+ {23, 22},
+ { {"0x", "VisitSVEContiguousStore_ScalarPlusScalar"},
+ {"10", "VisitSVEStoreVectorRegister"},
+ {"11", "VisitSVEContiguousStore_ScalarPlusScalar"},
+ },
+ },
+
{ "DecodeNEONScalarAnd3SHA",
{29, 23, 22, 15, 14, 11, 10},
{ {"0xx0x00", "VisitCrypto3RegSHA"},
@@ -1388,6 +2103,28 @@ static const DecodeMapping kDecodeMapping[] = {
{"otherwise", "VisitUnconditionalBranchToRegister"},
},
},
+
+ { "DecodeSVE101xxxxx",
+ {15, 14, 13},
+ { {"101", "DecodeSVE101xx101"},
+ {"010", "VisitSVEContiguousLoad_ScalarPlusScalar"},
+ {"otherwise", "VisitSVEMemContiguousLoad"},
+ },
+ },
+
+ { "DecodeSVE101xx101",
+ {20},
+ { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"},
+ {"1", "VisitSVEMemContiguousLoad"},
+ },
+ },
+
+ { "DecodeSVE00000001",
+ {20, 19},
+ { {"10", "VisitSVEMovprfx"},
+ {"otherwise", "VisitSVEIntReduction"},
+ },
+ },
};
// clang-format on
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 7d6fa148..d8ac2d24 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -24,6 +24,7 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include <bitset>
#include <cstdlib>
#include <sstream>
@@ -956,7 +957,7 @@ void Disassembler::VisitTestBranch(const Instruction *instr) {
// disassembled as Wt, otherwise Xt. As the top bit of the immediate is
// encoded in bit 31 of the instruction, we can reuse the Rt form, which
// uses bit 31 (normally "sf") to choose the register size.
- const char *form = "'Rt, 'IS, 'TImmTest";
+ const char *form = "'Rt, 'It, 'TImmTest";
switch (instr->Mask(TestBranchMask)) {
case TBZ:
@@ -1086,7 +1087,7 @@ void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) {
#undef LS_UNSIGNEDOFFSET
case PRFM_unsigned:
mnemonic = "prfm";
- form = "'PrefOp, ['Xns'ILU]";
+ form = "'prefOp, ['Xns'ILU]";
}
Format(instr, mnemonic, form);
}
@@ -1165,7 +1166,7 @@ void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) {
#undef LS_REGISTEROFFSET
case PRFM_reg:
mnemonic = "prfm";
- form = "'PrefOp, ['Xns, 'Offsetreg]";
+ form = "'prefOp, ['Xns, 'Offsetreg]";
}
Format(instr, mnemonic, form);
}
@@ -1180,7 +1181,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) {
const char *form_s = "'St, ['Xns'ILS]";
const char *form_d = "'Dt, ['Xns'ILS]";
const char *form_q = "'Qt, ['Xns'ILS]";
- const char *form_prefetch = "'PrefOp, ['Xns'ILS]";
+ const char *form_prefetch = "'prefOp, ['Xns'ILS]";
switch (instr->Mask(LoadStoreUnscaledOffsetMask)) {
case STURB_w:
@@ -1303,7 +1304,7 @@ void Disassembler::VisitLoadLiteral(const Instruction *instr) {
}
case PRFM_lit: {
mnemonic = "prfm";
- form = "'PrefOp, 'ILLiteral 'LValue";
+ form = "'prefOp, 'ILLiteral 'LValue";
break;
}
default:
@@ -1486,14 +1487,14 @@ void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) {
V(CASAH, "casah", "'Ws, 'Wt") \
V(CASLH, "caslh", "'Ws, 'Wt") \
V(CASALH, "casalh", "'Ws, 'Wt") \
- V(CASP_w, "casp", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
- V(CASP_x, "casp", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
- V(CASPA_w, "caspa", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
- V(CASPA_x, "caspa", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
- V(CASPL_w, "caspl", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
- V(CASPL_x, "caspl", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
- V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
- V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)")
+ V(CASP_w, "casp", "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASP_x, "casp", "'Xs, 'Xs+, 'Xt, 'Xt+") \
+ V(CASPA_w, "caspa", "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASPA_x, "caspa", "'Xs, 'Xs+, 'Xt, 'Xt+") \
+ V(CASPL_w, "caspl", "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASPL_x, "caspl", "'Xs, 'Xs+, 'Xt, 'Xt+") \
+ V(CASPAL_w, "caspal", "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASPAL_x, "caspal", "'Xs, 'Xs+, 'Xt, 'Xt+")
// clang-format on
@@ -1898,15 +1899,15 @@ void Disassembler::VisitFPImmediate(const Instruction *instr) {
switch (instr->Mask(FPImmediateMask)) {
case FMOV_h_imm:
mnemonic = "fmov";
- form = "'Hd, 'IFPHalf";
+ form = "'Hd, 'IFP";
break;
case FMOV_s_imm:
mnemonic = "fmov";
- form = "'Sd, 'IFPSingle";
+ form = "'Sd, 'IFP";
break;
case FMOV_d_imm:
mnemonic = "fmov";
- form = "'Dd, 'IFPDouble";
+ form = "'Dd, 'IFP";
break;
default:
VIXL_UNREACHABLE();
@@ -3409,7 +3410,7 @@ void Disassembler::VisitNEONCopy(const Instruction *instr) {
} else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) {
mnemonic = "smov";
nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
- form = "'Rdq, 'Vn.%s['IVInsIndex1]";
+ form = "'R30d, 'Vn.%s['IVInsIndex1]";
} else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
mnemonic = "dup";
form = "'Vd.%s, 'Vn.%s['IVInsIndex1]";
@@ -4006,19 +4007,16 @@ void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) {
}
} else { // cmode<0> == '1'
mnemonic = "fmov";
+ form = "'Vt.%s, 'IFPNeon";
if (half_enc == 1) {
- form = "'Vt.%s, 'IVMIImmFPHalf";
nfd.SetFormatMap(0, &map_h);
} else if (op == 0) {
- form = "'Vt.%s, 'IVMIImmFPSingle";
nfd.SetFormatMap(0, &map_s);
+ } else if (q == 1) {
+ form = "'Vt.2d, 'IFPNeon";
} else {
- if (q == 1) {
- form = "'Vt.2d, 'IVMIImmFPDouble";
- } else {
- mnemonic = "unallocated";
- form = "(NEONModifiedImmediate)";
- }
+ mnemonic = "unallocated";
+ form = "(NEONModifiedImmediate)";
}
}
}
@@ -4926,6 +4924,4582 @@ void Disassembler::VisitNEONPerm(const Instruction *instr) {
Format(instr, mnemonic, nfd.Substitute(form));
}
+void Disassembler::
+ VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]";
+
+ switch (instr->Mask(
+ SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
+ case LD1H_z_p_bz_s_x32_scaled:
+ mnemonic = "ld1h";
+ break;
+ case LD1SH_z_p_bz_s_x32_scaled:
+ mnemonic = "ld1sh";
+ break;
+ case LDFF1H_z_p_bz_s_x32_scaled:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SH_z_p_bz_s_x32_scaled:
+ mnemonic = "ldff1sh";
+ break;
+ default:
+ form = "(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]";
+
+ switch (
+ instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
+ case LD1W_z_p_bz_s_x32_scaled:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1W_z_p_bz_s_x32_scaled:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ form = "(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
+ const Instruction *instr) {
+ const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]";
+
+ const char *mnemonic = "unimplemented";
+ switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
+ case LD1B_z_p_bz_s_x32_unscaled:
+ mnemonic = "ld1b";
+ break;
+ case LD1H_z_p_bz_s_x32_unscaled:
+ mnemonic = "ld1h";
+ break;
+ case LD1SB_z_p_bz_s_x32_unscaled:
+ mnemonic = "ld1sb";
+ break;
+ case LD1SH_z_p_bz_s_x32_unscaled:
+ mnemonic = "ld1sh";
+ break;
+ case LD1W_z_p_bz_s_x32_unscaled:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1B_z_p_bz_s_x32_unscaled:
+ mnemonic = "ldff1b";
+ break;
+ case LDFF1H_z_p_bz_s_x32_unscaled:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SB_z_p_bz_s_x32_unscaled:
+ mnemonic = "ldff1sb";
+ break;
+ case LDFF1SH_z_p_bz_s_x32_unscaled:
+ mnemonic = "ldff1sh";
+ break;
+ case LDFF1W_z_p_bz_s_x32_unscaled:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ form = "(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm(
+ const Instruction *instr) {
+ const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]";
+ const char *form_imm_b = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]";
+ const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]";
+ const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]";
+ const char *form_imm;
+
+ const char *mnemonic = "unimplemented";
+ switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
+ case LD1B_z_p_ai_s:
+ mnemonic = "ld1b";
+ form_imm = form_imm_b;
+ break;
+ case LD1H_z_p_ai_s:
+ mnemonic = "ld1h";
+ form_imm = form_imm_h;
+ break;
+ case LD1SB_z_p_ai_s:
+ mnemonic = "ld1sb";
+ form_imm = form_imm_b;
+ break;
+ case LD1SH_z_p_ai_s:
+ mnemonic = "ld1sh";
+ form_imm = form_imm_h;
+ break;
+ case LD1W_z_p_ai_s:
+ mnemonic = "ld1w";
+ form_imm = form_imm_w;
+ break;
+ case LDFF1B_z_p_ai_s:
+ mnemonic = "ldff1b";
+ form_imm = form_imm_b;
+ break;
+ case LDFF1H_z_p_ai_s:
+ mnemonic = "ldff1h";
+ form_imm = form_imm_h;
+ break;
+ case LDFF1SB_z_p_ai_s:
+ mnemonic = "ldff1sb";
+ form_imm = form_imm_b;
+ break;
+ case LDFF1SH_z_p_ai_s:
+ mnemonic = "ldff1sh";
+ form_imm = form_imm_h;
+ break;
+ case LDFF1W_z_p_ai_s:
+ mnemonic = "ldff1w";
+ form_imm = form_imm_w;
+ break;
+ default:
+ form = "(SVE32BitGatherLoad_VectorPlusImm)";
+ form_imm = form;
+ break;
+ }
+ if (instr->ExtractBits(20, 16) != 0) form = form_imm;
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.s, '?22:suxtw";
+ const char *suffix = NULL;
+
+ switch (
+ instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
+ case PRFB_i_p_bz_s_x32_scaled:
+ mnemonic = "prfb";
+ suffix = "]";
+ break;
+ case PRFD_i_p_bz_s_x32_scaled:
+ mnemonic = "prfd";
+ suffix = " #3]";
+ break;
+ case PRFH_i_p_bz_s_x32_scaled:
+ mnemonic = "prfh";
+ suffix = " #1]";
+ break;
+ case PRFW_i_p_bz_s_x32_scaled:
+ mnemonic = "prfw";
+ suffix = " #2]";
+ break;
+ default:
+ form = "(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = (instr->ExtractBits(20, 16) != 0)
+ ? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]"
+ : "'prefSVEOp, 'Pgl, ['Zn.s]";
+
+ switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
+ case PRFB_i_p_ai_s:
+ mnemonic = "prfb";
+ break;
+ case PRFD_i_p_ai_s:
+ mnemonic = "prfd";
+ break;
+ case PRFH_i_p_ai_s:
+ mnemonic = "prfh";
+ break;
+ case PRFW_i_p_ai_s:
+ mnemonic = "prfw";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]";
+
+ switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
+ case ST1H_z_p_bz_s_x32_scaled:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bz_s_x32_scaled:
+ mnemonic = "st1w";
+ break;
+ default:
+ form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]";
+
+ switch (
+ instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
+ case ST1B_z_p_bz_s_x32_unscaled:
+ mnemonic = "st1b";
+ break;
+ case ST1H_z_p_bz_s_x32_unscaled:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bz_s_x32_unscaled:
+ mnemonic = "st1w";
+ break;
+ default:
+ form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.s}, 'Pgl, ['Zn.s";
+ const char *suffix = NULL;
+
+ bool is_zero = instr->ExtractBits(20, 16) == 0;
+
+ switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
+ case ST1B_z_p_ai_s:
+ mnemonic = "st1b";
+ suffix = is_zero ? "]" : ", #'u2016]";
+ break;
+ case ST1H_z_p_ai_s:
+ mnemonic = "st1h";
+ suffix = is_zero ? "]" : ", #'u2016*2]";
+ break;
+ case ST1W_z_p_ai_s:
+ mnemonic = "st1w";
+ suffix = is_zero ? "]" : ", #'u2016*4]";
+ break;
+ default:
+ form = "(SVE32BitScatterStore_VectorPlusImm)";
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw #'u2423]";
+
+ switch (instr->Mask(
+ SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
+ case LD1D_z_p_bz_d_x32_scaled:
+ mnemonic = "ld1d";
+ break;
+ case LD1H_z_p_bz_d_x32_scaled:
+ mnemonic = "ld1h";
+ break;
+ case LD1SH_z_p_bz_d_x32_scaled:
+ mnemonic = "ld1sh";
+ break;
+ case LD1SW_z_p_bz_d_x32_scaled:
+ mnemonic = "ld1sw";
+ break;
+ case LD1W_z_p_bz_d_x32_scaled:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1D_z_p_bz_d_x32_scaled:
+ mnemonic = "ldff1d";
+ break;
+ case LDFF1H_z_p_bz_d_x32_scaled:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SH_z_p_bz_d_x32_scaled:
+ mnemonic = "ldff1sh";
+ break;
+ case LDFF1SW_z_p_bz_d_x32_scaled:
+ mnemonic = "ldff1sw";
+ break;
+ case LDFF1W_z_p_bz_d_x32_scaled:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]";
+
+ switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
+ case LD1D_z_p_bz_d_64_scaled:
+ mnemonic = "ld1d";
+ break;
+ case LD1H_z_p_bz_d_64_scaled:
+ mnemonic = "ld1h";
+ break;
+ case LD1SH_z_p_bz_d_64_scaled:
+ mnemonic = "ld1sh";
+ break;
+ case LD1SW_z_p_bz_d_64_scaled:
+ mnemonic = "ld1sw";
+ break;
+ case LD1W_z_p_bz_d_64_scaled:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1D_z_p_bz_d_64_scaled:
+ mnemonic = "ldff1d";
+ break;
+ case LDFF1H_z_p_bz_d_64_scaled:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SH_z_p_bz_d_64_scaled:
+ mnemonic = "ldff1sh";
+ break;
+ case LDFF1SW_z_p_bz_d_64_scaled:
+ mnemonic = "ldff1sw";
+ break;
+ case LDFF1W_z_p_bz_d_64_scaled:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]";
+
+ switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
+ case LD1B_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1b";
+ break;
+ case LD1D_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1d";
+ break;
+ case LD1H_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1h";
+ break;
+ case LD1SB_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1sb";
+ break;
+ case LD1SH_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1sh";
+ break;
+ case LD1SW_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1sw";
+ break;
+ case LD1W_z_p_bz_d_64_unscaled:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1B_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1b";
+ break;
+ case LDFF1D_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1d";
+ break;
+ case LDFF1H_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SB_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1sb";
+ break;
+ case LDFF1SH_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1sh";
+ break;
+ case LDFF1SW_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1sw";
+ break;
+ case LDFF1W_z_p_bz_d_64_unscaled:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]";
+
+ switch (instr->Mask(
+ SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+ case LD1B_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1b";
+ break;
+ case LD1D_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1d";
+ break;
+ case LD1H_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1h";
+ break;
+ case LD1SB_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1sb";
+ break;
+ case LD1SH_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1sh";
+ break;
+ case LD1SW_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1sw";
+ break;
+ case LD1W_z_p_bz_d_x32_unscaled:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1B_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1b";
+ break;
+ case LDFF1D_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1d";
+ break;
+ case LDFF1H_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SB_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1sb";
+ break;
+ case LDFF1SH_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1sh";
+ break;
+ case LDFF1SW_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1sw";
+ break;
+ case LDFF1W_z_p_bz_d_x32_unscaled:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm(
+ const Instruction *instr) {
+ const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d]";
+ const char *form_imm[4] = {"{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016]",
+ "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*2]",
+ "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*4]",
+ "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*8]"};
+
+ if (instr->ExtractBits(20, 16) != 0) {
+ unsigned msz = instr->ExtractBits(24, 23);
+ bool sign_extend = instr->ExtractBit(14) == 0;
+ if ((msz == kDRegSizeInBytesLog2) && sign_extend) {
+ form = "(SVE64BitGatherLoad_VectorPlusImm)";
+ } else {
+ VIXL_ASSERT(msz < ArrayLength(form_imm));
+ form = form_imm[msz];
+ }
+ }
+
+ const char *mnemonic = "unimplemented";
+ switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
+ case LD1B_z_p_ai_d:
+ mnemonic = "ld1b";
+ break;
+ case LD1D_z_p_ai_d:
+ mnemonic = "ld1d";
+ break;
+ case LD1H_z_p_ai_d:
+ mnemonic = "ld1h";
+ break;
+ case LD1SB_z_p_ai_d:
+ mnemonic = "ld1sb";
+ break;
+ case LD1SH_z_p_ai_d:
+ mnemonic = "ld1sh";
+ break;
+ case LD1SW_z_p_ai_d:
+ mnemonic = "ld1sw";
+ break;
+ case LD1W_z_p_ai_d:
+ mnemonic = "ld1w";
+ break;
+ case LDFF1B_z_p_ai_d:
+ mnemonic = "ldff1b";
+ break;
+ case LDFF1D_z_p_ai_d:
+ mnemonic = "ldff1d";
+ break;
+ case LDFF1H_z_p_ai_d:
+ mnemonic = "ldff1h";
+ break;
+ case LDFF1SB_z_p_ai_d:
+ mnemonic = "ldff1sb";
+ break;
+ case LDFF1SH_z_p_ai_d:
+ mnemonic = "ldff1sh";
+ break;
+ case LDFF1SW_z_p_ai_d:
+ mnemonic = "ldff1sw";
+ break;
+ case LDFF1W_z_p_ai_d:
+ mnemonic = "ldff1w";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)";
+
+ switch (
+ instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
+ case PRFB_i_p_bz_d_64_scaled:
+ mnemonic = "prfb";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d]";
+ break;
+ case PRFD_i_p_bz_d_64_scaled:
+ mnemonic = "prfd";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #3]";
+ break;
+ case PRFH_i_p_bz_d_64_scaled:
+ mnemonic = "prfh";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #1]";
+ break;
+ case PRFW_i_p_bz_d_64_scaled:
+ mnemonic = "prfw";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #2]";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+ VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw";
+ const char *suffix = NULL;
+
+ switch (instr->Mask(
+ SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+ case PRFB_i_p_bz_d_x32_scaled:
+ mnemonic = "prfb";
+ suffix = " ]";
+ break;
+ case PRFD_i_p_bz_d_x32_scaled:
+ mnemonic = "prfd";
+ suffix = " #3]";
+ break;
+ case PRFH_i_p_bz_d_x32_scaled:
+ mnemonic = "prfh";
+ suffix = " #1]";
+ break;
+ case PRFW_i_p_bz_d_x32_scaled:
+ mnemonic = "prfw";
+ suffix = " #2]";
+ break;
+ default:
+ form = "(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = (instr->ExtractBits(20, 16) != 0)
+ ? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]"
+ : "'prefSVEOp, 'Pgl, ['Zn.d]";
+
+ switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
+ case PRFB_i_p_ai_d:
+ mnemonic = "prfb";
+ break;
+ case PRFD_i_p_ai_d:
+ mnemonic = "prfd";
+ break;
+ case PRFH_i_p_ai_d:
+ mnemonic = "prfh";
+ break;
+ case PRFW_i_p_ai_d:
+ mnemonic = "prfw";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]";
+
+ switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
+ case ST1D_z_p_bz_d_64_scaled:
+ mnemonic = "st1d";
+ break;
+ case ST1H_z_p_bz_d_64_scaled:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bz_d_64_scaled:
+ mnemonic = "st1w";
+ break;
+ default:
+ form = "(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]";
+
+ switch (
+ instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
+ case ST1B_z_p_bz_d_64_unscaled:
+ mnemonic = "st1b";
+ break;
+ case ST1D_z_p_bz_d_64_unscaled:
+ mnemonic = "st1d";
+ break;
+ case ST1H_z_p_bz_d_64_unscaled:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bz_d_64_unscaled:
+ mnemonic = "st1w";
+ break;
+ default:
+ form = "(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffset)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]";
+
+ switch (instr->Mask(
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+ case ST1D_z_p_bz_d_x32_scaled:
+ mnemonic = "st1d";
+ break;
+ case ST1H_z_p_bz_d_x32_scaled:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bz_d_x32_scaled:
+ mnemonic = "st1w";
+ break;
+ default:
+ form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]";
+
+ switch (instr->Mask(
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+ case ST1B_z_p_bz_d_x32_unscaled:
+ mnemonic = "st1b";
+ break;
+ case ST1D_z_p_bz_d_x32_unscaled:
+ mnemonic = "st1d";
+ break;
+ case ST1H_z_p_bz_d_x32_unscaled:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bz_d_x32_unscaled:
+ mnemonic = "st1w";
+ break;
+ default:
+ form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.d}, 'Pgl, ['Zn.d";
+ const char *suffix = NULL;
+
+ bool is_zero = instr->ExtractBits(20, 16) == 0;
+
+ switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
+ case ST1B_z_p_ai_d:
+ mnemonic = "st1b";
+ suffix = is_zero ? "]" : ", #'u2016]";
+ break;
+ case ST1D_z_p_ai_d:
+ mnemonic = "st1d";
+ suffix = is_zero ? "]" : ", #'u2016*8]";
+ break;
+ case ST1H_z_p_ai_d:
+ mnemonic = "st1h";
+ suffix = is_zero ? "]" : ", #'u2016*2]";
+ break;
+ case ST1W_z_p_ai_d:
+ mnemonic = "st1w";
+ suffix = is_zero ? "]" : ", #'u2016*4]";
+ break;
+ default:
+ form = "(SVE64BitScatterStore_VectorPlusImm)";
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'tl, 'Zd.'tl, 'ITriSvel";
+
+ if (instr->GetSVEImmLogical() == 0) {
+ // The immediate encoded in the instruction is not in the expected format.
+ Format(instr, "unallocated", "(SVEBitwiseImm)");
+ return;
+ }
+
+ switch (instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) {
+ case AND_z_zi:
+ mnemonic = "and";
+ break;
+ case EOR_z_zi:
+ mnemonic = "eor";
+ break;
+ case ORR_z_zi:
+ mnemonic = "orr";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
+ case AND_z_p_zz:
+ mnemonic = "and";
+ break;
+ case BIC_z_p_zz:
+ mnemonic = "bic";
+ break;
+ case EOR_z_p_zz:
+ mnemonic = "eor";
+ break;
+ case ORR_z_p_zz:
+ mnemonic = "orr";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftByImm_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, 'ITriSveq";
+ unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8);
+
+ if (tsize == 0) {
+ form = "(SVEBitwiseShiftByImm_Predicated)";
+ } else {
+ switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
+ case ASRD_z_p_zi:
+ mnemonic = "asrd";
+ break;
+ case ASR_z_p_zi:
+ mnemonic = "asr";
+ break;
+ case LSL_z_p_zi:
+ mnemonic = "lsl";
+ form = "'Zd.'tszp, p'u1210/m, 'Zd.'tszp, 'ITriSvep";
+ break;
+ case LSR_z_p_zi:
+ mnemonic = "lsr";
+ break;
+ default:
+ break;
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftByVector_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
+ case ASRR_z_p_zz:
+ mnemonic = "asrr";
+ break;
+ case ASR_z_p_zz:
+ mnemonic = "asr";
+ break;
+ case LSLR_z_p_zz:
+ mnemonic = "lslr";
+ break;
+ case LSL_z_p_zz:
+ mnemonic = "lsl";
+ break;
+ case LSRR_z_p_zz:
+ mnemonic = "lsrr";
+ break;
+ case LSR_z_p_zz:
+ mnemonic = "lsr";
+ break;
+ default:
+ form = "(SVEBitwiseShiftByVector_Predicated)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d";
+
+ if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+ form = "(SVEBitwiseShiftByWideElements_Predicated)";
+ } else {
+ switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
+ case ASR_z_p_zw:
+ mnemonic = "asr";
+ break;
+ case LSL_z_p_zw:
+ mnemonic = "lsl";
+ break;
+ case LSR_z_p_zw:
+ mnemonic = "lsr";
+ break;
+ default:
+ form = "(SVEBitwiseShiftByWideElements_Predicated)";
+ break;
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
+ VIXL_ASSERT(IsUintN(8 << lane_bytes_log2, value));
+
+ // Duplicate lane-sized value across double word.
+ switch (lane_bytes_log2) {
+ case 0:
+ value *= 0x0101010101010101;
+ break;
+ case 1:
+ value *= 0x0001000100010001;
+ break;
+ case 2:
+ value *= 0x0000000100000001;
+ break;
+ case 3: // Nothing to do
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ if ((value & 0xff) == 0) {
+ // Check for 16-bit patterns. Set least-significant 16 bits, to make tests
+ // easier; we already checked least-significant byte is zero above.
+ uint64_t generic_value = value | 0xffff;
+
+ // Check 0x00000000_0000pq00 or 0xffffffff_ffffpq00.
+ if ((generic_value == 0xffff) || (generic_value == UINT64_MAX)) {
+ return false;
+ }
+
+ // Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00.
+ uint64_t rotvalue = RotateRight(value, 32, 64);
+ if (value == rotvalue) {
+ generic_value &= 0xffffffff;
+ if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) {
+ return false;
+ }
+ }
+
+ // Check 0xpq00pq00_pq00pq00.
+ rotvalue = RotateRight(value, 16, 64);
+ if (value == rotvalue) {
+ return false;
+ }
+ } else {
+ // Check for 8-bit patterns. Set least-significant byte, to make tests
+ // easier.
+ uint64_t generic_value = value | 0xff;
+
+ // Check 0x00000000_000000pq or 0xffffffff_ffffffpq.
+ if ((generic_value == 0xff) || (generic_value == UINT64_MAX)) {
+ return false;
+ }
+
+ // Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq.
+ uint64_t rotvalue = RotateRight(value, 32, 64);
+ if (value == rotvalue) {
+ generic_value &= 0xffffffff;
+ if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) {
+ return false;
+ }
+ }
+
+ // Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq.
+ rotvalue = RotateRight(value, 16, 64);
+ if (value == rotvalue) {
+ generic_value &= 0xffff;
+ if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) {
+ return false;
+ }
+ }
+
+ // Check 0xpqpqpqpq_pqpqpqpq.
+ rotvalue = RotateRight(value, 8, 64);
+ if (value == rotvalue) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void Disassembler::VisitSVEBroadcastBitmaskImm(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEBroadcastBitmaskImm)";
+
+ switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
+ case DUPM_z_i: {
+ uint64_t imm = instr->GetSVEImmLogical();
+ if (imm != 0) {
+ int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+ mnemonic = SVEMoveMaskPreferred(imm, lane_size) ? "mov" : "dupm";
+ form = "'Zd.'tl, 'ITriSvel";
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastFPImm_Unpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEBroadcastFPImm_Unpredicated)";
+
+ switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
+ case FDUP_z_i:
+ // The preferred disassembly for fdup is "fmov".
+ mnemonic = "fmov";
+ form = "'Zd.'t, 'IFPSve";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastGeneralRegister(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEBroadcastGeneralRegister)";
+
+ switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
+ case DUP_z_r:
+ // The preferred disassembly for dup is "mov".
+ mnemonic = "mov";
+ if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+ form = "'Zd.'t, 'Xns";
+ } else {
+ form = "'Zd.'t, 'Wns";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEBroadcastIndexElement)";
+
+ switch (instr->Mask(SVEBroadcastIndexElementMask)) {
+ case DUP_z_zi: {
+ // The tsz field must not be zero.
+ int tsz = instr->ExtractBits(20, 16);
+ if (tsz != 0) {
+ // The preferred disassembly for dup is "mov".
+ mnemonic = "mov";
+ int imm2 = instr->ExtractBits(23, 22);
+ if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) {
+ // If imm2:tsz has one set bit, the index is zero. This is
+ // disassembled as a mov from a b/h/s/d/q scalar register.
+ form = "'Zd.'tszx, 'tszx'u0905";
+ } else {
+ form = "'Zd.'tszx, 'Zn.'tszx['IVInsSVEIndex]";
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastIntImm_Unpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEBroadcastIntImm_Unpredicated)";
+
+ switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
+ case DUP_z_i:
+ // The encoding of byte-sized lanes with lsl #8 is undefined.
+ if ((instr->GetSVEVectorFormat() == kFormatVnB) &&
+ (instr->ExtractBit(13) == 1))
+ break;
+
+ // The preferred disassembly for dup is "mov".
+ mnemonic = "mov";
+ form = (instr->ExtractBit(13) == 0) ? "'Zd.'t, #'s1205"
+ : "'Zd.'t, #'s1205, lsl #8";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVECompressActiveElements)";
+
+ switch (instr->Mask(SVECompressActiveElementsMask)) {
+ case COMPACT_z_p_z:
+ // The top bit of size is always set for compact, so 't can only be
+ // substituted with types S and D.
+ VIXL_ASSERT(instr->ExtractBit(23) == 1);
+ mnemonic = "compact";
+ form = "'Zd.'t, 'Pgl, 'Zn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyBroadcastElementToVector(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
+ case CLASTA_z_p_zz:
+ mnemonic = "clasta";
+ break;
+ case CLASTB_z_p_zz:
+ mnemonic = "clastb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t";
+
+ if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+ form = "'Xd, p'u1210, 'Xd, 'Zn.'t";
+ }
+
+ switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
+ case CLASTA_r_p_z:
+ mnemonic = "clasta";
+ break;
+ case CLASTB_r_p_z:
+ mnemonic = "clastb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t";
+
+ switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
+ case CLASTA_v_p_z:
+ mnemonic = "clasta";
+ break;
+ case CLASTB_v_p_z:
+ mnemonic = "clastb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyTerminateScalars(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm";
+
+ switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
+ case CTERMEQ_rr:
+ mnemonic = "ctermeq";
+ break;
+ case CTERMNE_rr:
+ mnemonic = "ctermne";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConstructivePrefix_Unpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEConstructivePrefix_Unpredicated)";
+
+ switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
+ case MOVPRFX_z_z:
+ mnemonic = "movprfx";
+ form = "'Zd, 'Zn";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+
+ bool rm_is_zr = instr->GetRm() == kZeroRegCode;
+
+ const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
+ const char *suffix = NULL;
+
+ switch (instr->Mask(SVEContiguousFirstFaultLoad_ScalarPlusScalarMask)) {
+ case LDFF1B_z_p_br_u16:
+ case LDFF1B_z_p_br_u32:
+ case LDFF1B_z_p_br_u64:
+ case LDFF1B_z_p_br_u8:
+ mnemonic = "ldff1b";
+ suffix = rm_is_zr ? "]" : ", 'Xm]";
+ break;
+ case LDFF1D_z_p_br_u64:
+ mnemonic = "ldff1d";
+ suffix = rm_is_zr ? "]" : ", 'Xm, lsl #3]";
+ break;
+ case LDFF1H_z_p_br_u16:
+ case LDFF1H_z_p_br_u32:
+ case LDFF1H_z_p_br_u64:
+ mnemonic = "ldff1h";
+ suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]";
+ break;
+ case LDFF1SB_z_p_br_s16:
+ case LDFF1SB_z_p_br_s32:
+ case LDFF1SB_z_p_br_s64:
+ mnemonic = "ldff1sb";
+ suffix = rm_is_zr ? "]" : ", 'Xm]";
+ break;
+ case LDFF1SH_z_p_br_s32:
+ case LDFF1SH_z_p_br_s64:
+ mnemonic = "ldff1sh";
+ suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]";
+ break;
+ case LDFF1SW_z_p_br_s64:
+ mnemonic = "ldff1sw";
+ suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]";
+ break;
+ case LDFF1W_z_p_br_u32:
+ case LDFF1W_z_p_br_u64:
+ mnemonic = "ldff1w";
+ suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]";
+ break;
+ default:
+ form = "(SVEContiguousFirstFaultLoad_ScalarPlusScalar)";
+ break;
+ }
+
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
+ const char *suffix =
+ (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+
+ switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
+ case LDNF1B_z_p_bi_u16:
+ case LDNF1B_z_p_bi_u32:
+ case LDNF1B_z_p_bi_u64:
+ case LDNF1B_z_p_bi_u8:
+ mnemonic = "ldnf1b";
+ break;
+ case LDNF1D_z_p_bi_u64:
+ mnemonic = "ldnf1d";
+ break;
+ case LDNF1H_z_p_bi_u16:
+ case LDNF1H_z_p_bi_u32:
+ case LDNF1H_z_p_bi_u64:
+ mnemonic = "ldnf1h";
+ break;
+ case LDNF1SB_z_p_bi_s16:
+ case LDNF1SB_z_p_bi_s32:
+ case LDNF1SB_z_p_bi_s64:
+ mnemonic = "ldnf1sb";
+ break;
+ case LDNF1SH_z_p_bi_s32:
+ case LDNF1SH_z_p_bi_s64:
+ mnemonic = "ldnf1sh";
+ break;
+ case LDNF1SW_z_p_bi_s64:
+ mnemonic = "ldnf1sw";
+ break;
+ case LDNF1W_z_p_bi_u32:
+ case LDNF1W_z_p_bi_u64:
+ mnemonic = "ldnf1w";
+ break;
+ default:
+ form = "(SVEContiguousNonFaultLoad_ScalarPlusImm)";
+ suffix = NULL;
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusImm)";
+
+ const char *suffix =
+ (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+ switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
+ case LDNT1B_z_p_bi_contiguous:
+ mnemonic = "ldnt1b";
+ form = "{'Zt.b}, 'Pgl/z, ['Xns";
+ break;
+ case LDNT1D_z_p_bi_contiguous:
+ mnemonic = "ldnt1d";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ break;
+ case LDNT1H_z_p_bi_contiguous:
+ mnemonic = "ldnt1h";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns";
+ break;
+ case LDNT1W_z_p_bi_contiguous:
+ mnemonic = "ldnt1w";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ break;
+ default:
+ suffix = NULL;
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusScalar)";
+
+ switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
+ case LDNT1B_z_p_br_contiguous:
+ mnemonic = "ldnt1b";
+ form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
+ break;
+ case LDNT1D_z_p_br_contiguous:
+ mnemonic = "ldnt1d";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]";
+ break;
+ case LDNT1H_z_p_br_contiguous:
+ mnemonic = "ldnt1h";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]";
+ break;
+ case LDNT1W_z_p_br_contiguous:
+ mnemonic = "ldnt1w";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusImm)";
+
+ const char *suffix =
+ (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+ switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
+ case STNT1B_z_p_bi_contiguous:
+ mnemonic = "stnt1b";
+ form = "{'Zt.b}, 'Pgl, ['Xns";
+ break;
+ case STNT1D_z_p_bi_contiguous:
+ mnemonic = "stnt1d";
+ form = "{'Zt.d}, 'Pgl, ['Xns";
+ break;
+ case STNT1H_z_p_bi_contiguous:
+ mnemonic = "stnt1h";
+ form = "{'Zt.h}, 'Pgl, ['Xns";
+ break;
+ case STNT1W_z_p_bi_contiguous:
+ mnemonic = "stnt1w";
+ form = "{'Zt.s}, 'Pgl, ['Xns";
+ break;
+ default:
+ suffix = NULL;
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusScalar)";
+
+ switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
+ case STNT1B_z_p_br_contiguous:
+ mnemonic = "stnt1b";
+ form = "{'Zt.b}, 'Pgl, ['Xns, 'Rm]";
+ break;
+ case STNT1D_z_p_br_contiguous:
+ mnemonic = "stnt1d";
+ form = "{'Zt.d}, 'Pgl, ['Xns, 'Rm, lsl #3]";
+ break;
+ case STNT1H_z_p_br_contiguous:
+ mnemonic = "stnt1h";
+ form = "{'Zt.h}, 'Pgl, ['Xns, 'Rm, lsl #1]";
+ break;
+ case STNT1W_z_p_br_contiguous:
+ mnemonic = "stnt1w";
+ form = "{'Zt.s}, 'Pgl, ['Xns, 'Rm, lsl #2]";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = (instr->ExtractBits(21, 16) != 0)
+ ? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]"
+ : "'prefSVEOp, 'Pgl, ['Xns]";
+
+ switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
+ case PRFB_i_p_bi_s:
+ mnemonic = "prfb";
+ break;
+ case PRFD_i_p_bi_s:
+ mnemonic = "prfd";
+ break;
+ case PRFH_i_p_bi_s:
+ mnemonic = "prfh";
+ break;
+ case PRFW_i_p_bi_s:
+ mnemonic = "prfw";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEContiguousPrefetch_ScalarPlusScalar)";
+
+ if (instr->GetRm() != kZeroRegCode) {
+ switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
+ case PRFB_i_p_br_s:
+ mnemonic = "prfb";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm]";
+ break;
+ case PRFD_i_p_br_s:
+ mnemonic = "prfd";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #3]";
+ break;
+ case PRFH_i_p_br_s:
+ mnemonic = "prfh";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #1]";
+ break;
+ case PRFW_i_p_br_s:
+ mnemonic = "prfw";
+ form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #2]";
+ break;
+ default:
+ break;
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousStore_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+
+ // The 'size' field isn't in the usual place here.
+ const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]";
+ if (instr->ExtractBits(19, 16) == 0) {
+ form = "{'Zt.'tls}, 'Pgl, ['Xns]";
+ }
+
+ switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
+ case ST1B_z_p_bi:
+ mnemonic = "st1b";
+ break;
+ case ST1D_z_p_bi:
+ mnemonic = "st1d";
+ break;
+ case ST1H_z_p_bi:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_bi:
+ mnemonic = "st1w";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+
+ // The 'size' field isn't in the usual place here.
+ const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]";
+
+ switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
+ case ST1B_z_p_br:
+ mnemonic = "st1b";
+ break;
+ case ST1D_z_p_br:
+ mnemonic = "st1d";
+ break;
+ case ST1H_z_p_br:
+ mnemonic = "st1h";
+ break;
+ case ST1W_z_p_br:
+ mnemonic = "st1w";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVECopyFPImm_Predicated)";
+
+ switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
+ case FCPY_z_p_i:
+ // The preferred disassembly for fcpy is "fmov".
+ mnemonic = "fmov";
+ form = "'Zd.'t, 'Pm/m, 'IFPSve";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECopyGeneralRegisterToVector_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVECopyGeneralRegisterToVector_Predicated)";
+
+ switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
+ case CPY_z_p_r:
+ // The preferred disassembly for cpy is "mov".
+ mnemonic = "mov";
+ form = "'Zd.'t, 'Pgl/m, 'Wns";
+ if (instr->GetSVESize() == kXRegSizeInBytesLog2) {
+ form = "'Zd.'t, 'Pgl/m, 'Xns";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECopyIntImm_Predicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVECopyIntImm_Predicated)";
+ const char *suffix = NULL;
+
+ switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
+ case CPY_z_p_i: {
+ // The preferred disassembly for cpy is "mov".
+ mnemonic = "mov";
+ form = "'Zd.'t, 'Pm/'?14:mz, #'s1205";
+ if (instr->ExtractBit(13) != 0) suffix = ", lsl #8";
+ break;
+ }
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVECopySIMDFPScalarRegisterToVector_Predicated)";
+
+ switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
+ case CPY_z_p_v:
+ // The preferred disassembly for cpy is "mov".
+ mnemonic = "mov";
+ form = "'Zd.'t, 'Pgl/m, 'Vnv";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEExtractElementToGeneralRegister(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Wd, 'Pgl, 'Zn.'t";
+
+ if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+ form = "'Xd, p'u1210, 'Zn.'t";
+ }
+
+ switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
+ case LASTA_r_p_z:
+ mnemonic = "lasta";
+ break;
+ case LASTB_r_p_z:
+ mnemonic = "lastb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'t'u0400, 'Pgl, 'Zn.'t";
+
+ switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
+ case LASTA_v_p_z:
+ mnemonic = "lasta";
+ break;
+ case LASTB_v_p_z:
+ mnemonic = "lastb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFFRInitialise)";
+
+ switch (instr->Mask(SVEFFRInitialiseMask)) {
+ case SETFFR_f:
+ mnemonic = "setffr";
+ form = " ";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFFRWriteFromPredicate)";
+
+ switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
+ case WRFFR_f_p:
+ mnemonic = "wrffr";
+ form = "'Pn.b";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPArithmeticWithImm_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form00 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.0";
+ const char *form05 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.5";
+ const char *form10 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #1.0";
+ const char *form20 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #2.0";
+ int i1 = instr->ExtractBit(5);
+ const char *form = i1 ? form10 : form00;
+
+ switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
+ case FADD_z_p_zs:
+ mnemonic = "fadd";
+ form = i1 ? form10 : form05;
+ break;
+ case FMAXNM_z_p_zs:
+ mnemonic = "fmaxnm";
+ break;
+ case FMAX_z_p_zs:
+ mnemonic = "fmax";
+ break;
+ case FMINNM_z_p_zs:
+ mnemonic = "fminnm";
+ break;
+ case FMIN_z_p_zs:
+ mnemonic = "fmin";
+ break;
+ case FMUL_z_p_zs:
+ mnemonic = "fmul";
+ form = i1 ? form20 : form05;
+ break;
+ case FSUBR_z_p_zs:
+ mnemonic = "fsubr";
+ form = i1 ? form10 : form05;
+ break;
+ case FSUB_z_p_zs:
+ mnemonic = "fsub";
+ form = i1 ? form10 : form05;
+ break;
+ default:
+ form = "(SVEFPArithmeticWithImm_Predicated)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
+ case FABD_z_p_zz:
+ mnemonic = "fabd";
+ break;
+ case FADD_z_p_zz:
+ mnemonic = "fadd";
+ break;
+ case FDIVR_z_p_zz:
+ mnemonic = "fdivr";
+ break;
+ case FDIV_z_p_zz:
+ mnemonic = "fdiv";
+ break;
+ case FMAXNM_z_p_zz:
+ mnemonic = "fmaxnm";
+ break;
+ case FMAX_z_p_zz:
+ mnemonic = "fmax";
+ break;
+ case FMINNM_z_p_zz:
+ mnemonic = "fminnm";
+ break;
+ case FMIN_z_p_zz:
+ mnemonic = "fmin";
+ break;
+ case FMULX_z_p_zz:
+ mnemonic = "fmulx";
+ break;
+ case FMUL_z_p_zz:
+ mnemonic = "fmul";
+ break;
+ case FSCALE_z_p_zz:
+ mnemonic = "fscale";
+ break;
+ case FSUBR_z_p_zz:
+ mnemonic = "fsubr";
+ break;
+ case FSUB_z_p_zz:
+ mnemonic = "fsub";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPConvertPrecision)";
+
+ switch (instr->Mask(SVEFPConvertPrecisionMask)) {
+ case FCVT_z_p_z_d2h:
+ mnemonic = "fcvt";
+ form = "'Zd.h, 'Pgl/m, 'Zn.d";
+ break;
+ case FCVT_z_p_z_d2s:
+ mnemonic = "fcvt";
+ form = "'Zd.s, 'Pgl/m, 'Zn.d";
+ break;
+ case FCVT_z_p_z_h2d:
+ mnemonic = "fcvt";
+ form = "'Zd.d, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVT_z_p_z_h2s:
+ mnemonic = "fcvt";
+ form = "'Zd.s, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVT_z_p_z_s2d:
+ mnemonic = "fcvt";
+ form = "'Zd.d, 'Pgl/m, 'Zn.s";
+ break;
+ case FCVT_z_p_z_s2h:
+ mnemonic = "fcvt";
+ form = "'Zd.h, 'Pgl/m, 'Zn.s";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPConvertToInt)";
+
+ switch (instr->Mask(SVEFPConvertToIntMask)) {
+ case FCVTZS_z_p_z_d2w:
+ mnemonic = "fcvtzs";
+ form = "'Zd.s, 'Pgl/m, 'Zn.d";
+ break;
+ case FCVTZS_z_p_z_d2x:
+ mnemonic = "fcvtzs";
+ form = "'Zd.d, 'Pgl/m, 'Zn.d";
+ break;
+ case FCVTZS_z_p_z_fp162h:
+ mnemonic = "fcvtzs";
+ form = "'Zd.h, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVTZS_z_p_z_fp162w:
+ mnemonic = "fcvtzs";
+ form = "'Zd.s, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVTZS_z_p_z_fp162x:
+ mnemonic = "fcvtzs";
+ form = "'Zd.d, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVTZS_z_p_z_s2w:
+ mnemonic = "fcvtzs";
+ form = "'Zd.s, 'Pgl/m, 'Zn.s";
+ break;
+ case FCVTZS_z_p_z_s2x:
+ mnemonic = "fcvtzs";
+ form = "'Zd.d, 'Pgl/m, 'Zn.s";
+ break;
+ case FCVTZU_z_p_z_d2w:
+ mnemonic = "fcvtzu";
+ form = "'Zd.s, 'Pgl/m, 'Zn.d";
+ break;
+ case FCVTZU_z_p_z_d2x:
+ mnemonic = "fcvtzu";
+ form = "'Zd.d, 'Pgl/m, 'Zn.d";
+ break;
+ case FCVTZU_z_p_z_fp162h:
+ mnemonic = "fcvtzu";
+ form = "'Zd.h, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVTZU_z_p_z_fp162w:
+ mnemonic = "fcvtzu";
+ form = "'Zd.s, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVTZU_z_p_z_fp162x:
+ mnemonic = "fcvtzu";
+ form = "'Zd.d, 'Pgl/m, 'Zn.h";
+ break;
+ case FCVTZU_z_p_z_s2w:
+ mnemonic = "fcvtzu";
+ form = "'Zd.s, 'Pgl/m, 'Zn.s";
+ break;
+ case FCVTZU_z_p_z_s2x:
+ mnemonic = "fcvtzu";
+ form = "'Zd.d, 'Pgl/m, 'Zn.s";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPExponentialAccelerator)";
+
+ unsigned size = instr->GetSVESize();
+ switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
+ case FEXPA_z_z:
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ mnemonic = "fexpa";
+ form = "'Zd.'t, 'Zn.'t";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+ switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
+ case FRINTA_z_p_z:
+ mnemonic = "frinta";
+ break;
+ case FRINTI_z_p_z:
+ mnemonic = "frinti";
+ break;
+ case FRINTM_z_p_z:
+ mnemonic = "frintm";
+ break;
+ case FRINTN_z_p_z:
+ mnemonic = "frintn";
+ break;
+ case FRINTP_z_p_z:
+ mnemonic = "frintp";
+ break;
+ case FRINTX_z_p_z:
+ mnemonic = "frintx";
+ break;
+ case FRINTZ_z_p_z:
+ mnemonic = "frintz";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPTrigMulAddCoefficient)";
+
+ unsigned size = instr->GetSVESize();
+ switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
+ case FTMAD_z_zzi:
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ mnemonic = "ftmad";
+ form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPTrigSelectCoefficient)";
+
+ unsigned size = instr->GetSVESize();
+ switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
+ case FTSSEL_z_zz:
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ mnemonic = "ftssel";
+ form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+ if (instr->GetSVESize() == kBRegSizeInBytesLog2) {
+ form = "(SVEFPUnaryOp)";
+ } else {
+ switch (instr->Mask(SVEFPUnaryOpMask)) {
+ case FRECPX_z_p_z:
+ mnemonic = "frecpx";
+ break;
+ case FSQRT_z_p_z:
+ mnemonic = "fsqrt";
+ break;
+ default:
+ form = "(SVEFPUnaryOp)";
+ break;
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+static const char *IncDecFormHelper(const Instruction *instr,
+ const char *reg_pat_mul_form,
+ const char *reg_pat_form,
+ const char *reg_form) {
+ if (instr->ExtractBits(19, 16) == 0) {
+ if (instr->ExtractBits(9, 5) == SVE_ALL) {
+ // Use the register only form if the multiplier is one (encoded as zero)
+ // and the pattern is SVE_ALL.
+ return reg_form;
+ }
+ // Use the register and pattern form if the multiplier is one.
+ return reg_pat_form;
+ }
+ return reg_pat_mul_form;
+}
+
+void Disassembler::VisitSVEIncDecRegisterByElementCount(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form =
+ IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd");
+
+ switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
+ case DECB_r_rs:
+ mnemonic = "decb";
+ break;
+ case DECD_r_rs:
+ mnemonic = "decd";
+ break;
+ case DECH_r_rs:
+ mnemonic = "dech";
+ break;
+ case DECW_r_rs:
+ mnemonic = "decw";
+ break;
+ case INCB_r_rs:
+ mnemonic = "incb";
+ break;
+ case INCD_r_rs:
+ mnemonic = "incd";
+ break;
+ case INCH_r_rs:
+ mnemonic = "inch";
+ break;
+ case INCW_r_rs:
+ mnemonic = "incw";
+ break;
+ default:
+ form = "(SVEIncDecRegisterByElementCount)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIncDecVectorByElementCount(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = IncDecFormHelper(instr,
+ "'Zd.'t, 'Ipc, mul #'u1916+1",
+ "'Zd.'t, 'Ipc",
+ "'Zd.'t");
+
+ switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
+ case DECD_z_zs:
+ mnemonic = "decd";
+ break;
+ case DECH_z_zs:
+ mnemonic = "dech";
+ break;
+ case DECW_z_zs:
+ mnemonic = "decw";
+ break;
+ case INCD_z_zs:
+ mnemonic = "incd";
+ break;
+ case INCH_z_zs:
+ mnemonic = "inch";
+ break;
+ case INCW_z_zs:
+ mnemonic = "incw";
+ break;
+ default:
+ form = "(SVEIncDecVectorByElementCount)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEInsertGeneralRegister)";
+
+ switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
+ case INSR_z_r:
+ mnemonic = "insr";
+ if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+ form = "'Zd.'t, 'Xn";
+ } else {
+ form = "'Zd.'t, 'Wn";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEInsertSIMDFPScalarRegister(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEInsertSIMDFPScalarRegister)";
+
+ switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
+ case INSR_z_v:
+ mnemonic = "insr";
+ form = "'Zd.'t, 'Vnv";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = (instr->ExtractBit(13) == 0)
+ ? "'Zd.'t, 'Zd.'t, #'u1205"
+ : "'Zd.'t, 'Zd.'t, #'u1205, lsl #8";
+
+ switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
+ case ADD_z_zi:
+ mnemonic = "add";
+ break;
+ case SQADD_z_zi:
+ mnemonic = "sqadd";
+ break;
+ case SQSUB_z_zi:
+ mnemonic = "sqsub";
+ break;
+ case SUBR_z_zi:
+ mnemonic = "subr";
+ break;
+ case SUB_z_zi:
+ mnemonic = "sub";
+ break;
+ case UQADD_z_zi:
+ mnemonic = "uqadd";
+ break;
+ case UQSUB_z_zi:
+ mnemonic = "uqsub";
+ break;
+ default:
+ form = "(SVEIntAddSubtractImm_Unpredicated)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntAddSubtractVectors_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
+ case ADD_z_p_zz:
+ mnemonic = "add";
+ break;
+ case SUBR_z_p_zz:
+ mnemonic = "subr";
+ break;
+ case SUB_z_p_zz:
+ mnemonic = "sub";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareScalarCountAndLimit(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form =
+ (instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm";
+
+ switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) {
+ case WHILELE_p_p_rr:
+ mnemonic = "whilele";
+ break;
+ case WHILELO_p_p_rr:
+ mnemonic = "whilelo";
+ break;
+ case WHILELS_p_p_rr:
+ mnemonic = "whilels";
+ break;
+ case WHILELT_p_p_rr:
+ mnemonic = "whilelt";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEIntConvertToFP)";
+
+ switch (instr->Mask(SVEIntConvertToFPMask)) {
+ case SCVTF_z_p_z_h2fp16:
+ mnemonic = "scvtf";
+ form = "'Zd.h, 'Pgl/m, 'Zn.h";
+ break;
+ case SCVTF_z_p_z_w2d:
+ mnemonic = "scvtf";
+ form = "'Zd.d, 'Pgl/m, 'Zn.s";
+ break;
+ case SCVTF_z_p_z_w2fp16:
+ mnemonic = "scvtf";
+ form = "'Zd.h, 'Pgl/m, 'Zn.s";
+ break;
+ case SCVTF_z_p_z_w2s:
+ mnemonic = "scvtf";
+ form = "'Zd.s, 'Pgl/m, 'Zn.s";
+ break;
+ case SCVTF_z_p_z_x2d:
+ mnemonic = "scvtf";
+ form = "'Zd.d, 'Pgl/m, 'Zn.d";
+ break;
+ case SCVTF_z_p_z_x2fp16:
+ mnemonic = "scvtf";
+ form = "'Zd.h, 'Pgl/m, 'Zn.d";
+ break;
+ case SCVTF_z_p_z_x2s:
+ mnemonic = "scvtf";
+ form = "'Zd.s, 'Pgl/m, 'Zn.d";
+ break;
+ case UCVTF_z_p_z_h2fp16:
+ mnemonic = "ucvtf";
+ form = "'Zd.h, 'Pgl/m, 'Zn.h";
+ break;
+ case UCVTF_z_p_z_w2d:
+ mnemonic = "ucvtf";
+ form = "'Zd.d, 'Pgl/m, 'Zn.s";
+ break;
+ case UCVTF_z_p_z_w2fp16:
+ mnemonic = "ucvtf";
+ form = "'Zd.h, 'Pgl/m, 'Zn.s";
+ break;
+ case UCVTF_z_p_z_w2s:
+ mnemonic = "ucvtf";
+ form = "'Zd.s, 'Pgl/m, 'Zn.s";
+ break;
+ case UCVTF_z_p_z_x2d:
+ mnemonic = "ucvtf";
+ form = "'Zd.d, 'Pgl/m, 'Zn.d";
+ break;
+ case UCVTF_z_p_z_x2fp16:
+ mnemonic = "ucvtf";
+ form = "'Zd.h, 'Pgl/m, 'Zn.d";
+ break;
+ case UCVTF_z_p_z_x2s:
+ mnemonic = "ucvtf";
+ form = "'Zd.s, 'Pgl/m, 'Zn.d";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntDivideVectors_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
+ case SDIVR_z_p_zz:
+ mnemonic = "sdivr";
+ break;
+ case SDIV_z_p_zz:
+ mnemonic = "sdiv";
+ break;
+ case UDIVR_z_p_zz:
+ mnemonic = "udivr";
+ break;
+ case UDIV_z_p_zz:
+ mnemonic = "udiv";
+ break;
+ default:
+ break;
+ }
+
+ switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
+ case SDIVR_z_p_zz:
+ case SDIV_z_p_zz:
+ case UDIVR_z_p_zz:
+ case UDIV_z_p_zz:
+ switch (instr->GetSVESize()) {
+ case kBRegSizeInBytesLog2:
+ case kHRegSizeInBytesLog2:
+ mnemonic = "unimplemented";
+ form = "(SVEIntBinaryArithmeticPredicated)";
+ break;
+ case kSRegSizeInBytesLog2:
+ case kDRegSizeInBytesLog2:
+ // The default form works for these instructions.
+ break;
+ default:
+ // GetSVESize() should never return other values.
+ VIXL_UNREACHABLE();
+ break;
+ }
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMinMaxDifference_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
+ case SABD_z_p_zz:
+ mnemonic = "sabd";
+ break;
+ case SMAX_z_p_zz:
+ mnemonic = "smax";
+ break;
+ case SMIN_z_p_zz:
+ mnemonic = "smin";
+ break;
+ case UABD_z_p_zz:
+ mnemonic = "uabd";
+ break;
+ case UMAX_z_p_zz:
+ mnemonic = "umax";
+ break;
+ case UMIN_z_p_zz:
+ mnemonic = "umin";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Zd.'t, #'u1205";
+
+ switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
+ case SMAX_z_zi:
+ mnemonic = "smax";
+ form = "'Zd.'t, 'Zd.'t, #'s1205";
+ break;
+ case SMIN_z_zi:
+ mnemonic = "smin";
+ form = "'Zd.'t, 'Zd.'t, #'s1205";
+ break;
+ case UMAX_z_zi:
+ mnemonic = "umax";
+ break;
+ case UMIN_z_zi:
+ mnemonic = "umin";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEIntMulImm_Unpredicated)";
+
+ switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
+ case MUL_z_zi:
+ mnemonic = "mul";
+ form = "'Zd.'t, 'Zd.'t, #'s1205";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
+ case MUL_z_p_zz:
+ mnemonic = "mul";
+ break;
+ case SMULH_z_p_zz:
+ mnemonic = "smulh";
+ break;
+ case UMULH_z_p_zz:
+ mnemonic = "umulh";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadAndBroadcastElement)";
+ const char *suffix_b = ", #'u2116]";
+ const char *suffix_h = ", #'u2116*2]";
+ const char *suffix_w = ", #'u2116*4]";
+ const char *suffix_d = ", #'u2116*8]";
+ const char *suffix = NULL;
+
+ switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
+ case LD1RB_z_p_bi_u16:
+ mnemonic = "ld1rb";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RB_z_p_bi_u32:
+ mnemonic = "ld1rb";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RB_z_p_bi_u64:
+ mnemonic = "ld1rb";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RB_z_p_bi_u8:
+ mnemonic = "ld1rb";
+ form = "{'Zt.b}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RD_z_p_bi_u64:
+ mnemonic = "ld1rd";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_d;
+ break;
+ case LD1RH_z_p_bi_u16:
+ mnemonic = "ld1rh";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns";
+ suffix = suffix_h;
+ break;
+ case LD1RH_z_p_bi_u32:
+ mnemonic = "ld1rh";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ suffix = suffix_h;
+ break;
+ case LD1RH_z_p_bi_u64:
+ mnemonic = "ld1rh";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_h;
+ break;
+ case LD1RSB_z_p_bi_s16:
+ mnemonic = "ld1rsb";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RSB_z_p_bi_s32:
+ mnemonic = "ld1rsb";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RSB_z_p_bi_s64:
+ mnemonic = "ld1rsb";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_b;
+ break;
+ case LD1RSH_z_p_bi_s32:
+ mnemonic = "ld1rsh";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ suffix = suffix_h;
+ break;
+ case LD1RSH_z_p_bi_s64:
+ mnemonic = "ld1rsh";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_h;
+ break;
+ case LD1RSW_z_p_bi_s64:
+ mnemonic = "ld1rsw";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_w;
+ break;
+ case LD1RW_z_p_bi_u32:
+ mnemonic = "ld1rw";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ suffix = suffix_w;
+ break;
+ case LD1RW_z_p_bi_u64:
+ mnemonic = "ld1rw";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_w;
+ break;
+ default:
+ break;
+ }
+
+ // Hide curly brackets if immediate is zero.
+ if (instr->ExtractBits(21, 16) == 0) {
+ suffix = "]";
+ }
+
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusImm)";
+
+ const char *suffix =
+ (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916*16]";
+
+ switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) {
+ case LD1RQB_z_p_bi_u8:
+ mnemonic = "ld1rqb";
+ form = "{'Zt.b}, 'Pgl/z, ['Xns";
+ break;
+ case LD1RQD_z_p_bi_u64:
+ mnemonic = "ld1rqd";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ break;
+ case LD1RQH_z_p_bi_u16:
+ mnemonic = "ld1rqh";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns";
+ break;
+ case LD1RQW_z_p_bi_u32:
+ mnemonic = "ld1rqw";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ break;
+ default:
+ suffix = NULL;
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusScalar)";
+
+ switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) {
+ case LD1RQB_z_p_br_contiguous:
+ mnemonic = "ld1rqb";
+ form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
+ break;
+ case LD1RQD_z_p_br_contiguous:
+ mnemonic = "ld1rqd";
+ form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]";
+ break;
+ case LD1RQH_z_p_br_contiguous:
+ mnemonic = "ld1rqh";
+ form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]";
+ break;
+ case LD1RQW_z_p_br_contiguous:
+ mnemonic = "ld1rqw";
+ form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadMultipleStructures_ScalarPlusImm)";
+
+ const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]";
+ const char *form_3 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]";
+ const char *form_4 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+ "'Pgl/z, ['Xns'ISveSvl]";
+
+ switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
+ case LD2B_z_p_bi_contiguous:
+ mnemonic = "ld2b";
+ form = form_2;
+ break;
+ case LD2D_z_p_bi_contiguous:
+ mnemonic = "ld2d";
+ form = form_2;
+ break;
+ case LD2H_z_p_bi_contiguous:
+ mnemonic = "ld2h";
+ form = form_2;
+ break;
+ case LD2W_z_p_bi_contiguous:
+ mnemonic = "ld2w";
+ form = form_2;
+ break;
+ case LD3B_z_p_bi_contiguous:
+ mnemonic = "ld3b";
+ form = form_3;
+ break;
+ case LD3D_z_p_bi_contiguous:
+ mnemonic = "ld3d";
+ form = form_3;
+ break;
+ case LD3H_z_p_bi_contiguous:
+ mnemonic = "ld3h";
+ form = form_3;
+ break;
+ case LD3W_z_p_bi_contiguous:
+ mnemonic = "ld3w";
+ form = form_3;
+ break;
+ case LD4B_z_p_bi_contiguous:
+ mnemonic = "ld4b";
+ form = form_4;
+ break;
+ case LD4D_z_p_bi_contiguous:
+ mnemonic = "ld4d";
+ form = form_4;
+ break;
+ case LD4H_z_p_bi_contiguous:
+ mnemonic = "ld4h";
+ form = form_4;
+ break;
+ case LD4W_z_p_bi_contiguous:
+ mnemonic = "ld4w";
+ form = form_4;
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadMultipleStructures_ScalarPlusScalar)";
+
+ const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]";
+ const char *form_3 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]";
+ const char *form_4 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+ "'Pgl/z, ['Xns, 'Xm'NSveS]";
+
+ switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
+ case LD2B_z_p_br_contiguous:
+ mnemonic = "ld2b";
+ form = form_2;
+ break;
+ case LD2D_z_p_br_contiguous:
+ mnemonic = "ld2d";
+ form = form_2;
+ break;
+ case LD2H_z_p_br_contiguous:
+ mnemonic = "ld2h";
+ form = form_2;
+ break;
+ case LD2W_z_p_br_contiguous:
+ mnemonic = "ld2w";
+ form = form_2;
+ break;
+ case LD3B_z_p_br_contiguous:
+ mnemonic = "ld3b";
+ form = form_3;
+ break;
+ case LD3D_z_p_br_contiguous:
+ mnemonic = "ld3d";
+ form = form_3;
+ break;
+ case LD3H_z_p_br_contiguous:
+ mnemonic = "ld3h";
+ form = form_3;
+ break;
+ case LD3W_z_p_br_contiguous:
+ mnemonic = "ld3w";
+ form = form_3;
+ break;
+ case LD4B_z_p_br_contiguous:
+ mnemonic = "ld4b";
+ form = form_4;
+ break;
+ case LD4D_z_p_br_contiguous:
+ mnemonic = "ld4d";
+ form = form_4;
+ break;
+ case LD4H_z_p_br_contiguous:
+ mnemonic = "ld4h";
+ form = form_4;
+ break;
+ case LD4W_z_p_br_contiguous:
+ mnemonic = "ld4w";
+ form = form_4;
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadPredicateRegister)";
+
+ switch (instr->Mask(SVELoadPredicateRegisterMask)) {
+ case LDR_p_bi:
+ mnemonic = "ldr";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Pd, ['Xns]";
+ } else {
+ form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVELoadVectorRegister)";
+
+ switch (instr->Mask(SVELoadVectorRegisterMask)) {
+ case LDR_z_bi:
+ mnemonic = "ldr";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Zd, ['Xns]";
+ } else {
+ form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.b, p'u1310/'?04:mz, 'Pn.b";
+
+ switch (instr->Mask(SVEPartitionBreakConditionMask)) {
+ case BRKAS_p_p_p_z:
+ mnemonic = "brkas";
+ break;
+ case BRKA_p_p_p:
+ mnemonic = "brka";
+ break;
+ case BRKBS_p_p_p_z:
+ mnemonic = "brkbs";
+ break;
+ case BRKB_p_p_p:
+ mnemonic = "brkb";
+ break;
+ default:
+ form = "(SVEPartitionBreakCondition)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.'t, 'Pn.'t, 'Pm.'t";
+
+ switch (instr->Mask(SVEPermutePredicateElementsMask)) {
+ case TRN1_p_pp:
+ mnemonic = "trn1";
+ break;
+ case TRN2_p_pp:
+ mnemonic = "trn2";
+ break;
+ case UZP1_p_pp:
+ mnemonic = "uzp1";
+ break;
+ case UZP2_p_pp:
+ mnemonic = "uzp2";
+ break;
+ case ZIP1_p_pp:
+ mnemonic = "zip1";
+ break;
+ case ZIP2_p_pp:
+ mnemonic = "zip2";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPredicateFirstActive)";
+
+ switch (instr->Mask(SVEPredicateFirstActiveMask)) {
+ case PFIRST_p_p_p:
+ mnemonic = "pfirst";
+ form = "'Pd.b, 'Pn, 'Pd.b";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPredicateReadFromFFR_Unpredicated)";
+
+ switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
+ case RDFFR_p_f:
+ mnemonic = "rdffr";
+ form = "'Pd.b";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateTest(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPredicateTest)";
+
+ switch (instr->Mask(SVEPredicateTestMask)) {
+ case PTEST_p_p:
+ mnemonic = "ptest";
+ form = "p'u1310, 'Pn.b";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateZero(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPredicateZero)";
+
+ switch (instr->Mask(SVEPredicateZeroMask)) {
+ case PFALSE_p:
+ mnemonic = "pfalse";
+ form = "'Pd.b";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPropagateBreakToNextPartition(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b";
+
+ switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
+ case BRKNS_p_p_pp:
+ mnemonic = "brkns";
+ break;
+ case BRKN_p_p_pp:
+ mnemonic = "brkn";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEReversePredicateElements)";
+
+ switch (instr->Mask(SVEReversePredicateElementsMask)) {
+ case REV_p_p:
+ mnemonic = "rev";
+ form = "'Pd.'t, 'Pn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEReverseVectorElements)";
+
+ switch (instr->Mask(SVEReverseVectorElementsMask)) {
+ case REV_z_z:
+ mnemonic = "rev";
+ form = "'Zd.'t, 'Zn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+ unsigned size = instr->GetSVESize();
+ switch (instr->Mask(SVEReverseWithinElementsMask)) {
+ case RBIT_z_p_z:
+ mnemonic = "rbit";
+ break;
+ case REVB_z_z:
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ mnemonic = "revb";
+ } else {
+ form = "(SVEReverseWithinElements)";
+ }
+ break;
+ case REVH_z_z:
+ if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) {
+ mnemonic = "revh";
+ } else {
+ form = "(SVEReverseWithinElements)";
+ }
+ break;
+ case REVW_z_z:
+ if (size == kDRegSizeInBytesLog2) {
+ mnemonic = "revw";
+ } else {
+ form = "(SVEReverseWithinElements)";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = IncDecFormHelper(instr,
+ "'R20d, 'Ipc, mul #'u1916+1",
+ "'R20d, 'Ipc",
+ "'R20d");
+ const char *form_sx = IncDecFormHelper(instr,
+ "'Xd, 'Wd, 'Ipc, mul #'u1916+1",
+ "'Xd, 'Wd, 'Ipc",
+ "'Xd, 'Wd");
+
+ switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
+ case SQDECB_r_rs_sx:
+ mnemonic = "sqdecb";
+ form = form_sx;
+ break;
+ case SQDECD_r_rs_sx:
+ mnemonic = "sqdecd";
+ form = form_sx;
+ break;
+ case SQDECH_r_rs_sx:
+ mnemonic = "sqdech";
+ form = form_sx;
+ break;
+ case SQDECW_r_rs_sx:
+ mnemonic = "sqdecw";
+ form = form_sx;
+ break;
+ case SQINCB_r_rs_sx:
+ mnemonic = "sqincb";
+ form = form_sx;
+ break;
+ case SQINCD_r_rs_sx:
+ mnemonic = "sqincd";
+ form = form_sx;
+ break;
+ case SQINCH_r_rs_sx:
+ mnemonic = "sqinch";
+ form = form_sx;
+ break;
+ case SQINCW_r_rs_sx:
+ mnemonic = "sqincw";
+ form = form_sx;
+ break;
+ case SQDECB_r_rs_x:
+ mnemonic = "sqdecb";
+ break;
+ case SQDECD_r_rs_x:
+ mnemonic = "sqdecd";
+ break;
+ case SQDECH_r_rs_x:
+ mnemonic = "sqdech";
+ break;
+ case SQDECW_r_rs_x:
+ mnemonic = "sqdecw";
+ break;
+ case SQINCB_r_rs_x:
+ mnemonic = "sqincb";
+ break;
+ case SQINCD_r_rs_x:
+ mnemonic = "sqincd";
+ break;
+ case SQINCH_r_rs_x:
+ mnemonic = "sqinch";
+ break;
+ case SQINCW_r_rs_x:
+ mnemonic = "sqincw";
+ break;
+ case UQDECB_r_rs_uw:
+ case UQDECB_r_rs_x:
+ mnemonic = "uqdecb";
+ break;
+ case UQDECD_r_rs_uw:
+ case UQDECD_r_rs_x:
+ mnemonic = "uqdecd";
+ break;
+ case UQDECH_r_rs_uw:
+ case UQDECH_r_rs_x:
+ mnemonic = "uqdech";
+ break;
+ case UQDECW_r_rs_uw:
+ case UQDECW_r_rs_x:
+ mnemonic = "uqdecw";
+ break;
+ case UQINCB_r_rs_uw:
+ case UQINCB_r_rs_x:
+ mnemonic = "uqincb";
+ break;
+ case UQINCD_r_rs_uw:
+ case UQINCD_r_rs_x:
+ mnemonic = "uqincd";
+ break;
+ case UQINCH_r_rs_uw:
+ case UQINCH_r_rs_x:
+ mnemonic = "uqinch";
+ break;
+ case UQINCW_r_rs_uw:
+ case UQINCW_r_rs_x:
+ mnemonic = "uqincw";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVESaturatingIncDecVectorByElementCount(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = IncDecFormHelper(instr,
+ "'Zd.'t, 'Ipc, mul #'u1916+1",
+ "'Zd.'t, 'Ipc",
+ "'Zd.'t");
+
+ switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
+ case SQDECD_z_zs:
+ mnemonic = "sqdecd";
+ break;
+ case SQDECH_z_zs:
+ mnemonic = "sqdech";
+ break;
+ case SQDECW_z_zs:
+ mnemonic = "sqdecw";
+ break;
+ case SQINCD_z_zs:
+ mnemonic = "sqincd";
+ break;
+ case SQINCH_z_zs:
+ mnemonic = "sqinch";
+ break;
+ case SQINCW_z_zs:
+ mnemonic = "sqincw";
+ break;
+ case UQDECD_z_zs:
+ mnemonic = "uqdecd";
+ break;
+ case UQDECH_z_zs:
+ mnemonic = "uqdech";
+ break;
+ case UQDECW_z_zs:
+ mnemonic = "uqdecw";
+ break;
+ case UQINCD_z_zs:
+ mnemonic = "uqincd";
+ break;
+ case UQINCH_z_zs:
+ mnemonic = "uqinch";
+ break;
+ case UQINCW_z_zs:
+ mnemonic = "uqincw";
+ break;
+ default:
+ form = "(SVEElementCount)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEStoreMultipleStructures_ScalarPlusImm)";
+
+ const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns'ISveSvl]";
+ const char *form_3 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns'ISveSvl]";
+ const char *form_4 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+ "'Pgl, ['Xns'ISveSvl]";
+
+ switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
+ case ST2B_z_p_bi_contiguous:
+ mnemonic = "st2b";
+ form = form_2;
+ break;
+ case ST2H_z_p_bi_contiguous:
+ mnemonic = "st2h";
+ form = form_2;
+ break;
+ case ST2W_z_p_bi_contiguous:
+ mnemonic = "st2w";
+ form = form_2;
+ break;
+ case ST2D_z_p_bi_contiguous:
+ mnemonic = "st2d";
+ form = form_2;
+ break;
+ case ST3B_z_p_bi_contiguous:
+ mnemonic = "st3b";
+ form = form_3;
+ break;
+ case ST3H_z_p_bi_contiguous:
+ mnemonic = "st3h";
+ form = form_3;
+ break;
+ case ST3W_z_p_bi_contiguous:
+ mnemonic = "st3w";
+ form = form_3;
+ break;
+ case ST3D_z_p_bi_contiguous:
+ mnemonic = "st3d";
+ form = form_3;
+ break;
+ case ST4B_z_p_bi_contiguous:
+ mnemonic = "st4b";
+ form = form_4;
+ break;
+ case ST4H_z_p_bi_contiguous:
+ mnemonic = "st4h";
+ form = form_4;
+ break;
+ case ST4W_z_p_bi_contiguous:
+ mnemonic = "st4w";
+ form = form_4;
+ break;
+ case ST4D_z_p_bi_contiguous:
+ mnemonic = "st4d";
+ form = form_4;
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEStoreMultipleStructures_ScalarPlusScalar)";
+
+ const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]";
+ const char *form_3 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]";
+ const char *form_4 =
+ "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+ "'Pgl, ['Xns, 'Xm'NSveS]";
+
+ switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
+ case ST2B_z_p_br_contiguous:
+ mnemonic = "st2b";
+ form = form_2;
+ break;
+ case ST2D_z_p_br_contiguous:
+ mnemonic = "st2d";
+ form = form_2;
+ break;
+ case ST2H_z_p_br_contiguous:
+ mnemonic = "st2h";
+ form = form_2;
+ break;
+ case ST2W_z_p_br_contiguous:
+ mnemonic = "st2w";
+ form = form_2;
+ break;
+ case ST3B_z_p_br_contiguous:
+ mnemonic = "st3b";
+ form = form_3;
+ break;
+ case ST3D_z_p_br_contiguous:
+ mnemonic = "st3d";
+ form = form_3;
+ break;
+ case ST3H_z_p_br_contiguous:
+ mnemonic = "st3h";
+ form = form_3;
+ break;
+ case ST3W_z_p_br_contiguous:
+ mnemonic = "st3w";
+ form = form_3;
+ break;
+ case ST4B_z_p_br_contiguous:
+ mnemonic = "st4b";
+ form = form_4;
+ break;
+ case ST4D_z_p_br_contiguous:
+ mnemonic = "st4d";
+ form = form_4;
+ break;
+ case ST4H_z_p_br_contiguous:
+ mnemonic = "st4h";
+ form = form_4;
+ break;
+ case ST4W_z_p_br_contiguous:
+ mnemonic = "st4w";
+ form = form_4;
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEStorePredicateRegister)";
+
+ switch (instr->Mask(SVEStorePredicateRegisterMask)) {
+ case STR_p_bi:
+ mnemonic = "str";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Pd, ['Xns]";
+ } else {
+ form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEStoreVectorRegister)";
+
+ switch (instr->Mask(SVEStoreVectorRegisterMask)) {
+ case STR_z_bi:
+ mnemonic = "str";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Zd, ['Xns]";
+ } else {
+ form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVETableLookup(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVETableLookup)";
+
+ switch (instr->Mask(SVETableLookupMask)) {
+ case TBL_z_zz_1:
+ mnemonic = "tbl";
+ form = "'Zd.'t, {'Zn.'t}, 'Zm.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.h, 'Pn.b";
+
+ switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
+ case PUNPKHI_p_p:
+ mnemonic = "punpkhi";
+ break;
+ case PUNPKLO_p_p:
+ mnemonic = "punpklo";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Zn.'th";
+
+ if (instr->GetSVESize() == 0) {
+ // The lowest lane size of the destination vector is H-sized lane.
+ Format(instr, "unallocated", "(SVEUnpackVectorElements)");
+ return;
+ }
+
+ switch (instr->Mask(SVEUnpackVectorElementsMask)) {
+ case SUNPKHI_z_z:
+ mnemonic = "sunpkhi";
+ break;
+ case SUNPKLO_z_z:
+ mnemonic = "sunpklo";
+ break;
+ case UUNPKHI_z_z:
+ mnemonic = "uunpkhi";
+ break;
+ case UUNPKLO_z_z:
+ mnemonic = "uunpklo";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEVectorSplice_Destructive(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEVectorSplice_Destructive)";
+
+ switch (instr->Mask(SVEVectorSplice_DestructiveMask)) {
+ case SPLICE_z_p_zz_des:
+ mnemonic = "splice";
+ form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) {
+ const char *mnemonic = "adr";
+ const char *form = "'Zd.d, ['Zn.d, 'Zm.d";
+ const char *suffix = NULL;
+
+ bool msz_is_zero = (instr->ExtractBits(11, 10) == 0);
+
+ switch (instr->Mask(SVEAddressGenerationMask)) {
+ case ADR_z_az_d_s32_scaled:
+ suffix = msz_is_zero ? ", sxtw]" : ", sxtw #'u1110]";
+ break;
+ case ADR_z_az_d_u32_scaled:
+ suffix = msz_is_zero ? ", uxtw]" : ", uxtw #'u1110]";
+ break;
+ case ADR_z_az_s_same_scaled:
+ case ADR_z_az_d_same_scaled:
+ form = "'Zd.'t, ['Zn.'t, 'Zm.'t";
+ suffix = msz_is_zero ? "]" : ", lsl #'u1110]";
+ break;
+ default:
+ mnemonic = "unimplemented";
+ form = "(SVEAddressGeneration)";
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEBitwiseLogicalUnpredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.d, 'Zn.d, 'Zm.d";
+
+ switch (instr->Mask(SVEBitwiseLogicalUnpredicatedMask)) {
+ case AND_z_zz:
+ mnemonic = "and";
+ break;
+ case BIC_z_zz:
+ mnemonic = "bic";
+ break;
+ case EOR_z_zz:
+ mnemonic = "eor";
+ break;
+ case ORR_z_zz:
+ mnemonic = "orr";
+ if (instr->GetRn() == instr->GetRm()) {
+ mnemonic = "mov";
+ form = "'Zd.d, 'Zn.d";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEBitwiseShiftUnpredicated)";
+ unsigned tsize =
+ (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19);
+ unsigned lane_size = instr->GetSVESize();
+
+ switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
+ case ASR_z_zi:
+ if (tsize != 0) {
+ // The tsz field must not be zero.
+ mnemonic = "asr";
+ form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
+ }
+ break;
+ case ASR_z_zw:
+ if (lane_size <= kSRegSizeInBytesLog2) {
+ mnemonic = "asr";
+ form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+ }
+ break;
+ case LSL_z_zi:
+ if (tsize != 0) {
+ // The tsz field must not be zero.
+ mnemonic = "lsl";
+ form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSver";
+ }
+ break;
+ case LSL_z_zw:
+ if (lane_size <= kSRegSizeInBytesLog2) {
+ mnemonic = "lsl";
+ form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+ }
+ break;
+ case LSR_z_zi:
+ if (tsize != 0) {
+ // The tsz field must not be zero.
+ mnemonic = "lsr";
+ form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
+ }
+ break;
+ case LSR_z_zw:
+ if (lane_size <= kSRegSizeInBytesLog2) {
+ mnemonic = "lsr";
+ form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+ }
+ break;
+ default:
+ break;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEElementCount(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form =
+ IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd");
+
+ switch (instr->Mask(SVEElementCountMask)) {
+ case CNTB_r_s:
+ mnemonic = "cntb";
+ break;
+ case CNTD_r_s:
+ mnemonic = "cntd";
+ break;
+ case CNTH_r_s:
+ mnemonic = "cnth";
+ break;
+ case CNTW_r_s:
+ mnemonic = "cntw";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPAccumulatingReduction)";
+
+ switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
+ case FADDA_v_p_z:
+ mnemonic = "fadda";
+ form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+
+ switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
+ case FADD_z_zz:
+ mnemonic = "fadd";
+ break;
+ case FMUL_z_zz:
+ mnemonic = "fmul";
+ break;
+ case FRECPS_z_zz:
+ mnemonic = "frecps";
+ break;
+ case FRSQRTS_z_zz:
+ mnemonic = "frsqrts";
+ break;
+ case FSUB_z_zz:
+ mnemonic = "fsub";
+ break;
+ case FTSMUL_z_zz:
+ mnemonic = "ftsmul";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+
+ switch (instr->Mask(SVEFPCompareVectorsMask)) {
+ case FACGE_p_p_zz:
+ mnemonic = "facge";
+ break;
+ case FACGT_p_p_zz:
+ mnemonic = "facgt";
+ break;
+ case FCMEQ_p_p_zz:
+ mnemonic = "fcmeq";
+ break;
+ case FCMGE_p_p_zz:
+ mnemonic = "fcmge";
+ break;
+ case FCMGT_p_p_zz:
+ mnemonic = "fcmgt";
+ break;
+ case FCMNE_p_p_zz:
+ mnemonic = "fcmne";
+ break;
+ case FCMUO_p_p_zz:
+ mnemonic = "fcmuo";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0";
+
+ switch (instr->Mask(SVEFPCompareWithZeroMask)) {
+ case FCMEQ_p_p_z0:
+ mnemonic = "fcmeq";
+ break;
+ case FCMGE_p_p_z0:
+ mnemonic = "fcmge";
+ break;
+ case FCMGT_p_p_z0:
+ mnemonic = "fcmgt";
+ break;
+ case FCMLE_p_p_z0:
+ mnemonic = "fcmle";
+ break;
+ case FCMLT_p_p_z0:
+ mnemonic = "fcmlt";
+ break;
+ case FCMNE_p_p_z0:
+ mnemonic = "fcmne";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPComplexAddition)";
+
+ switch (instr->Mask(SVEFPComplexAdditionMask)) {
+ case FCADD_z_p_zz:
+ mnemonic = "fcadd";
+ if (instr->ExtractBit(16) == 0) {
+ form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #90";
+ } else {
+ form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #270";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPComplexMulAdd)";
+ const char *suffix = NULL;
+
+ const char *fcmla_constants[] = {"0", "90", "180", "270"};
+
+ switch (instr->Mask(SVEFPComplexMulAddMask)) {
+ case FCMLA_z_p_zzz:
+ mnemonic = "fcmla";
+ form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #";
+ suffix = fcmla_constants[instr->ExtractBits(14, 13)];
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPComplexMulAddIndex)";
+
+ const char *fcmla_constants[] = {"0", "90", "180", "270"};
+ const char *suffix = fcmla_constants[instr->ExtractBits(11, 10)];
+
+ switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
+ case FCMLA_z_zzzi_h:
+ mnemonic = "fcmla";
+ form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #";
+ break;
+ case FCMLA_z_zzzi_s:
+ mnemonic = "fcmla";
+ form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #";
+ break;
+ default:
+ suffix = NULL;
+ break;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'t'u0400, 'Pgl, 'Zn.'t";
+
+ switch (instr->Mask(SVEFPFastReductionMask)) {
+ case FADDV_v_p_z:
+ mnemonic = "faddv";
+ break;
+ case FMAXNMV_v_p_z:
+ mnemonic = "fmaxnmv";
+ break;
+ case FMAXV_v_p_z:
+ mnemonic = "fmaxv";
+ break;
+ case FMINNMV_v_p_z:
+ mnemonic = "fminnmv";
+ break;
+ case FMINV_v_p_z:
+ mnemonic = "fminv";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPMulIndex)";
+
+ switch (instr->Mask(SVEFPMulIndexMask)) {
+ case FMUL_z_zzi_d:
+ mnemonic = "fmul";
+ form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+ break;
+ case FMUL_z_zzi_h:
+ case FMUL_z_zzi_h_i3h:
+ mnemonic = "fmul";
+ form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ break;
+ case FMUL_z_zzi_s:
+ mnemonic = "fmul";
+ form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
+
+ switch (instr->Mask(SVEFPMulAddMask)) {
+ case FMAD_z_p_zzz:
+ mnemonic = "fmad";
+ break;
+ case FMLA_z_p_zzz:
+ mnemonic = "fmla";
+ break;
+ case FMLS_z_p_zzz:
+ mnemonic = "fmls";
+ break;
+ case FMSB_z_p_zzz:
+ mnemonic = "fmsb";
+ break;
+ case FNMAD_z_p_zzz:
+ mnemonic = "fnmad";
+ break;
+ case FNMLA_z_p_zzz:
+ mnemonic = "fnmla";
+ break;
+ case FNMLS_z_p_zzz:
+ mnemonic = "fnmls";
+ break;
+ case FNMSB_z_p_zzz:
+ mnemonic = "fnmsb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEFPMulAddIndex)";
+
+ switch (instr->Mask(SVEFPMulAddIndexMask)) {
+ case FMLA_z_zzzi_d:
+ mnemonic = "fmla";
+ form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+ break;
+ case FMLA_z_zzzi_s:
+ mnemonic = "fmla";
+ form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+ break;
+ case FMLS_z_zzzi_d:
+ mnemonic = "fmls";
+ form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+ break;
+ case FMLS_z_zzzi_s:
+ mnemonic = "fmls";
+ form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+ break;
+ case FMLA_z_zzzi_h:
+ case FMLA_z_zzzi_h_i3h:
+ mnemonic = "fmla";
+ form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ break;
+ case FMLS_z_zzzi_h:
+ case FMLS_z_zzzi_h_i3h:
+ mnemonic = "fmls";
+ form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ break;
+ default:
+ break;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Zn.'t";
+
+ switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
+ case FRECPE_z_z:
+ mnemonic = "frecpe";
+ break;
+ case FRSQRTE_z_z:
+ mnemonic = "frsqrte";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEIncDecByPredicateCount)";
+
+ switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+ case DECP_r_p_r:
+ case DECP_z_p_z:
+ mnemonic = "decp";
+ break;
+ case INCP_r_p_r:
+ case INCP_z_p_z:
+ mnemonic = "incp";
+ break;
+ case SQDECP_r_p_r_sx:
+ case SQDECP_r_p_r_x:
+ case SQDECP_z_p_z:
+ mnemonic = "sqdecp";
+ break;
+ case SQINCP_r_p_r_sx:
+ case SQINCP_r_p_r_x:
+ case SQINCP_z_p_z:
+ mnemonic = "sqincp";
+ break;
+ case UQDECP_r_p_r_uw:
+ case UQDECP_r_p_r_x:
+ case UQDECP_z_p_z:
+ mnemonic = "uqdecp";
+ break;
+ case UQINCP_r_p_r_uw:
+ case UQINCP_r_p_r_x:
+ case UQINCP_z_p_z:
+ mnemonic = "uqincp";
+ break;
+ default:
+ break;
+ }
+
+ switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+ // <Xdn>, <Pg>.<T>
+ case DECP_r_p_r:
+ case INCP_r_p_r:
+ form = "'Xd, 'Pn.'t";
+ break;
+ // <Zdn>.<T>, <Pg>
+ case DECP_z_p_z:
+ case INCP_z_p_z:
+ case SQDECP_z_p_z:
+ case SQINCP_z_p_z:
+ case UQDECP_z_p_z:
+ case UQINCP_z_p_z:
+ form = "'Zd.'t, 'Pn";
+ break;
+ // <Xdn>, <Pg>.<T>, <Wdn>
+ case SQDECP_r_p_r_sx:
+ case SQINCP_r_p_r_sx:
+ form = "'Xd, 'Pn.'t, 'Wd";
+ break;
+ // <Xdn>, <Pg>.<T>
+ case SQDECP_r_p_r_x:
+ case SQINCP_r_p_r_x:
+ case UQDECP_r_p_r_x:
+ case UQINCP_r_p_r_x:
+ form = "'Xd, 'Pn.'t";
+ break;
+ // <Wdn>, <Pg>.<T>
+ case UQDECP_r_p_r_uw:
+ case UQINCP_r_p_r_uw:
+ form = "'Wd, 'Pn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEIndexGeneration)";
+
+ bool w_inputs =
+ static_cast<unsigned>(instr->GetSVESize()) <= kWRegSizeInBytesLog2;
+
+ switch (instr->Mask(SVEIndexGenerationMask)) {
+ case INDEX_z_ii:
+ mnemonic = "index";
+ form = "'Zd.'t, #'s0905, #'s2016";
+ break;
+ case INDEX_z_ir:
+ mnemonic = "index";
+ form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm";
+ break;
+ case INDEX_z_ri:
+ mnemonic = "index";
+ form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016";
+ break;
+ case INDEX_z_rr:
+ mnemonic = "index";
+ form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+
+ switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
+ case ADD_z_zz:
+ mnemonic = "add";
+ break;
+ case SQADD_z_zz:
+ mnemonic = "sqadd";
+ break;
+ case SQSUB_z_zz:
+ mnemonic = "sqsub";
+ break;
+ case SUB_z_zz:
+ mnemonic = "sub";
+ break;
+ case UQADD_z_zz:
+ mnemonic = "uqadd";
+ break;
+ case UQSUB_z_zz:
+ mnemonic = "uqsub";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016";
+
+ switch (instr->Mask(SVEIntCompareSignedImmMask)) {
+ case CMPEQ_p_p_zi:
+ mnemonic = "cmpeq";
+ break;
+ case CMPGE_p_p_zi:
+ mnemonic = "cmpge";
+ break;
+ case CMPGT_p_p_zi:
+ mnemonic = "cmpgt";
+ break;
+ case CMPLE_p_p_zi:
+ mnemonic = "cmple";
+ break;
+ case CMPLT_p_p_zi:
+ mnemonic = "cmplt";
+ break;
+ case CMPNE_p_p_zi:
+ mnemonic = "cmpne";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014";
+
+ switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
+ case CMPHI_p_p_zi:
+ mnemonic = "cmphi";
+ break;
+ case CMPHS_p_p_zi:
+ mnemonic = "cmphs";
+ break;
+ case CMPLO_p_p_zi:
+ mnemonic = "cmplo";
+ break;
+ case CMPLS_p_p_zi:
+ mnemonic = "cmpls";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.d";
+
+ switch (instr->Mask(SVEIntCompareVectorsMask)) {
+ case CMPEQ_p_p_zw:
+ mnemonic = "cmpeq";
+ break;
+ case CMPEQ_p_p_zz:
+ mnemonic = "cmpeq";
+ form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ break;
+ case CMPGE_p_p_zw:
+ mnemonic = "cmpge";
+ break;
+ case CMPGE_p_p_zz:
+ mnemonic = "cmpge";
+ form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ break;
+ case CMPGT_p_p_zw:
+ mnemonic = "cmpgt";
+ break;
+ case CMPGT_p_p_zz:
+ mnemonic = "cmpgt";
+ form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ break;
+ case CMPHI_p_p_zw:
+ mnemonic = "cmphi";
+ break;
+ case CMPHI_p_p_zz:
+ mnemonic = "cmphi";
+ form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ break;
+ case CMPHS_p_p_zw:
+ mnemonic = "cmphs";
+ break;
+ case CMPHS_p_p_zz:
+ mnemonic = "cmphs";
+ form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ break;
+ case CMPLE_p_p_zw:
+ mnemonic = "cmple";
+ break;
+ case CMPLO_p_p_zw:
+ mnemonic = "cmplo";
+ break;
+ case CMPLS_p_p_zw:
+ mnemonic = "cmpls";
+ break;
+ case CMPLT_p_p_zw:
+ mnemonic = "cmplt";
+ break;
+ case CMPNE_p_p_zw:
+ mnemonic = "cmpne";
+ break;
+ case CMPNE_p_p_zz:
+ mnemonic = "cmpne";
+ form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEIntMulAddPredicated)";
+
+ switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
+ case MAD_z_p_zzz:
+ mnemonic = "mad";
+ form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t";
+ break;
+ case MLA_z_p_zzz:
+ mnemonic = "mla";
+ form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
+ break;
+ case MLS_z_p_zzz:
+ mnemonic = "mls";
+ form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
+ break;
+ case MSB_z_p_zzz:
+ mnemonic = "msb";
+ form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEIntMulAddUnpredicated)";
+
+ if (static_cast<unsigned>(instr->GetSVESize()) >= kSRegSizeInBytesLog2) {
+ form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq";
+ switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) {
+ case SDOT_z_zzz:
+ mnemonic = "sdot";
+ break;
+ case UDOT_z_zzz:
+ mnemonic = "udot";
+ break;
+ default:
+ break;
+ }
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEMovprfx(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEMovprfx)";
+
+ if (instr->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z) {
+ mnemonic = "movprfx";
+ form = "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t";
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntReduction(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vdv, 'Pgl, 'Zn.'t";
+
+ if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
+ switch (instr->Mask(SVEIntReductionLogicalMask)) {
+ case ANDV_r_p_z:
+ mnemonic = "andv";
+ break;
+ case EORV_r_p_z:
+ mnemonic = "eorv";
+ break;
+ case ORV_r_p_z:
+ mnemonic = "orv";
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (instr->Mask(SVEIntReductionMask)) {
+ case SADDV_r_p_z:
+ mnemonic = "saddv";
+ form = "'Dd, 'Pgl, 'Zn.'t";
+ break;
+ case SMAXV_r_p_z:
+ mnemonic = "smaxv";
+ break;
+ case SMINV_r_p_z:
+ mnemonic = "sminv";
+ break;
+ case UADDV_r_p_z:
+ mnemonic = "uaddv";
+ form = "'Dd, 'Pgl, 'Zn.'t";
+ break;
+ case UMAXV_r_p_z:
+ mnemonic = "umaxv";
+ break;
+ case UMINV_r_p_z:
+ mnemonic = "uminv";
+ break;
+ default:
+ break;
+ }
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntUnaryArithmeticPredicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+ switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
+ case ABS_z_p_z:
+ mnemonic = "abs";
+ break;
+ case CLS_z_p_z:
+ mnemonic = "cls";
+ break;
+ case CLZ_z_p_z:
+ mnemonic = "clz";
+ break;
+ case CNOT_z_p_z:
+ mnemonic = "cnot";
+ break;
+ case CNT_z_p_z:
+ mnemonic = "cnt";
+ break;
+ case FABS_z_p_z:
+ mnemonic = "fabs";
+ break;
+ case FNEG_z_p_z:
+ mnemonic = "fneg";
+ break;
+ case NEG_z_p_z:
+ mnemonic = "neg";
+ break;
+ case NOT_z_p_z:
+ mnemonic = "not";
+ break;
+ case SXTB_z_p_z:
+ mnemonic = "sxtb";
+ break;
+ case SXTH_z_p_z:
+ mnemonic = "sxth";
+ break;
+ case SXTW_z_p_z:
+ mnemonic = "sxtw";
+ break;
+ case UXTB_z_p_z:
+ mnemonic = "uxtb";
+ break;
+ case UXTH_z_p_z:
+ mnemonic = "uxth";
+ break;
+ case UXTW_z_p_z:
+ mnemonic = "uxtw";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEMulIndex(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEMulIndex)";
+
+ switch (instr->Mask(SVEMulIndexMask)) {
+ case SDOT_z_zzzi_d:
+ mnemonic = "sdot";
+ form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]";
+ break;
+ case SDOT_z_zzzi_s:
+ mnemonic = "sdot";
+ form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
+ break;
+ case UDOT_z_zzzi_d:
+ mnemonic = "udot";
+ form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]";
+ break;
+ case UDOT_z_zzzi_s:
+ mnemonic = "udot";
+ form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPermuteVectorExtract)";
+
+ switch (instr->Mask(SVEPermuteVectorExtractMask)) {
+ case EXT_z_zi_des:
+ mnemonic = "ext";
+ form = "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+
+ switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
+ case TRN1_z_zz:
+ mnemonic = "trn1";
+ break;
+ case TRN2_z_zz:
+ mnemonic = "trn2";
+ break;
+ case UZP1_z_zz:
+ mnemonic = "uzp1";
+ break;
+ case UZP2_z_zz:
+ mnemonic = "uzp2";
+ break;
+ case ZIP1_z_zz:
+ mnemonic = "zip1";
+ break;
+ case ZIP2_z_zz:
+ mnemonic = "zip2";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateCount(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPredicateCount)";
+
+ switch (instr->Mask(SVEPredicateCountMask)) {
+ case CNTP_r_p_p:
+ mnemonic = "cntp";
+ form = "'Xd, p'u1310, 'Pn.'t";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b";
+
+ int pd = instr->GetPd();
+ int pn = instr->GetPn();
+ int pm = instr->GetPm();
+ int pg = instr->ExtractBits(13, 10);
+
+ switch (instr->Mask(SVEPredicateLogicalMask)) {
+ case ANDS_p_p_pp_z:
+ mnemonic = "ands";
+ if (pn == pm) {
+ mnemonic = "movs";
+ form = "'Pd.b, p'u1310/z, 'Pn.b";
+ }
+ break;
+ case AND_p_p_pp_z:
+ mnemonic = "and";
+ if (pn == pm) {
+ mnemonic = "mov";
+ form = "'Pd.b, p'u1310/z, 'Pn.b";
+ }
+ break;
+ case BICS_p_p_pp_z:
+ mnemonic = "bics";
+ break;
+ case BIC_p_p_pp_z:
+ mnemonic = "bic";
+ break;
+ case EORS_p_p_pp_z:
+ mnemonic = "eors";
+ if (pm == pg) {
+ mnemonic = "nots";
+ form = "'Pd.b, 'Pm/z, 'Pn.b";
+ }
+ break;
+ case EOR_p_p_pp_z:
+ mnemonic = "eor";
+ if (pm == pg) {
+ mnemonic = "not";
+ form = "'Pd.b, 'Pm/z, 'Pn.b";
+ }
+ break;
+ case NANDS_p_p_pp_z:
+ mnemonic = "nands";
+ break;
+ case NAND_p_p_pp_z:
+ mnemonic = "nand";
+ break;
+ case NORS_p_p_pp_z:
+ mnemonic = "nors";
+ break;
+ case NOR_p_p_pp_z:
+ mnemonic = "nor";
+ break;
+ case ORNS_p_p_pp_z:
+ mnemonic = "orns";
+ break;
+ case ORN_p_p_pp_z:
+ mnemonic = "orn";
+ break;
+ case ORRS_p_p_pp_z:
+ mnemonic = "orrs";
+ if ((pn == pm) && (pn == pg)) {
+ mnemonic = "movs";
+ form = "'Pd.b, 'Pn.b";
+ }
+ break;
+ case ORR_p_p_pp_z:
+ mnemonic = "orr";
+ if ((pn == pm) && (pn == pg)) {
+ mnemonic = "mov";
+ form = "'Pd.b, 'Pn.b";
+ }
+ break;
+ case SEL_p_p_pp:
+ if (pd == pm) {
+ mnemonic = "mov";
+ form = "'Pd.b, p'u1310/m, 'Pn.b";
+ } else {
+ mnemonic = "sel";
+ form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b";
+ }
+ break;
+ default:
+ form = "(SVEPredicateLogical)";
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) {
+ // This group only contains PTRUE{S}, and there are no unallocated encodings.
+ VIXL_STATIC_ASSERT(
+ SVEPredicateInitializeMask ==
+ (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
+ VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
+ (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
+
+ const char *mnemonic = instr->ExtractBit(16) ? "ptrues" : "ptrue";
+ const char *form = "'Pd.'t, 'Ipc";
+ // Omit the pattern if it is the default ('ALL').
+ if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t";
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) {
+ // This group only contains PNEXT, and there are no unallocated encodings.
+ VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
+ VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
+
+ Format(instr, "pnext", "'Pd.'t, 'Pn, 'Pd.'t");
+}
+
+void Disassembler::VisitSVEPredicateReadFromFFR_Predicated(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEPredicateReadFromFFR_Predicated)";
+ switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
+ case RDFFR_p_p_f:
+ case RDFFRS_p_p_f:
+ mnemonic = instr->ExtractBit(22) ? "rdffrs" : "rdffr";
+ form = "'Pd.b, 'Pn/z";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b";
+
+ switch (instr->Mask(SVEPropagateBreakMask)) {
+ case BRKPAS_p_p_pp:
+ mnemonic = "brkpas";
+ break;
+ case BRKPA_p_p_pp:
+ mnemonic = "brkpa";
+ break;
+ case BRKPBS_p_p_pp:
+ mnemonic = "brkpbs";
+ break;
+ case BRKPB_p_p_pp:
+ mnemonic = "brkpb";
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Xds, 'Xms, #'s1005";
+
+ switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
+ case ADDPL_r_ri:
+ mnemonic = "addpl";
+ break;
+ case ADDVL_r_ri:
+ mnemonic = "addvl";
+ break;
+ default:
+ form = "(SVEStackFrameAdjustment)";
+ break;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEStackFrameSize)";
+
+ switch (instr->Mask(SVEStackFrameSizeMask)) {
+ case RDVL_r_i:
+ mnemonic = "rdvl";
+ form = "'Xd, #'s1005";
+ break;
+ default:
+ break;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEVectorSelect(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "(SVEVectorSelect)";
+
+ switch (instr->Mask(SVEVectorSelectMask)) {
+ case SEL_z_p_zz:
+ if (instr->GetRd() == instr->GetRm()) {
+ mnemonic = "mov";
+ form = "'Zd.'t, p'u1310/m, 'Zn.'t";
+ } else {
+ mnemonic = "sel";
+ form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t";
+ }
+ break;
+ default:
+ break;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
+ const char *suffix =
+ (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+
+ switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
+ case LD1B_z_p_bi_u16:
+ case LD1B_z_p_bi_u32:
+ case LD1B_z_p_bi_u64:
+ case LD1B_z_p_bi_u8:
+ mnemonic = "ld1b";
+ break;
+ case LD1D_z_p_bi_u64:
+ mnemonic = "ld1d";
+ break;
+ case LD1H_z_p_bi_u16:
+ case LD1H_z_p_bi_u32:
+ case LD1H_z_p_bi_u64:
+ mnemonic = "ld1h";
+ break;
+ case LD1SB_z_p_bi_s16:
+ case LD1SB_z_p_bi_s32:
+ case LD1SB_z_p_bi_s64:
+ mnemonic = "ld1sb";
+ break;
+ case LD1SH_z_p_bi_s32:
+ case LD1SH_z_p_bi_s64:
+ mnemonic = "ld1sh";
+ break;
+ case LD1SW_z_p_bi_s64:
+ mnemonic = "ld1sw";
+ break;
+ case LD1W_z_p_bi_u32:
+ case LD1W_z_p_bi_u64:
+ mnemonic = "ld1w";
+ break;
+ default:
+ form = "(SVEContiguousLoad_ScalarPlusImm)";
+ suffix = NULL;
+ break;
+ }
+
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar(
+ const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm";
+ const char *suffix = NULL;
+
+ switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
+ case LD1B_z_p_br_u16:
+ case LD1B_z_p_br_u32:
+ case LD1B_z_p_br_u64:
+ case LD1B_z_p_br_u8:
+ mnemonic = "ld1b";
+ suffix = "]";
+ break;
+ case LD1D_z_p_br_u64:
+ mnemonic = "ld1d";
+ suffix = ", lsl #'u2423]";
+ break;
+ case LD1H_z_p_br_u16:
+ case LD1H_z_p_br_u32:
+ case LD1H_z_p_br_u64:
+ mnemonic = "ld1h";
+ suffix = ", lsl #'u2423]";
+ break;
+ case LD1SB_z_p_br_s16:
+ case LD1SB_z_p_br_s32:
+ case LD1SB_z_p_br_s64:
+ mnemonic = "ld1sb";
+ suffix = "]";
+ break;
+ case LD1SH_z_p_br_s32:
+ case LD1SH_z_p_br_s64:
+ mnemonic = "ld1sh";
+ suffix = ", lsl #1]";
+ break;
+ case LD1SW_z_p_br_s64:
+ mnemonic = "ld1sw";
+ suffix = ", lsl #2]";
+ break;
+ case LD1W_z_p_br_u32:
+ case LD1W_z_p_br_u64:
+ mnemonic = "ld1w";
+ suffix = ", lsl #'u2423]";
+ break;
+ default:
+ form = "(SVEContiguousLoad_ScalarPlusScalar)";
+ suffix = NULL;
+ break;
+ }
+
+ Format(instr, mnemonic, form, suffix);
+}
void Disassembler::VisitReserved(const Instruction *instr) {
// UDF is the only instruction in this group, and the Decoder is precise.
@@ -5059,14 +9633,18 @@ int64_t Disassembler::CodeRelativeAddress(const void *addr) {
void Disassembler::Format(const Instruction *instr,
const char *mnemonic,
- const char *format) {
+ const char *format0,
+ const char *format1) {
VIXL_ASSERT(mnemonic != NULL);
ResetOutput();
Substitute(instr, mnemonic);
- if (format != NULL) {
+ if (format0 != NULL) {
VIXL_ASSERT(buffer_pos_ < buffer_size_);
buffer_[buffer_pos_++] = ' ';
- Substitute(instr, format);
+ Substitute(instr, format0);
+ if (format1 != NULL) {
+ Substitute(instr, format1);
+ }
}
VIXL_ASSERT(buffer_pos_ < buffer_size_);
buffer_[buffer_pos_] = 0;
@@ -5091,10 +9669,11 @@ void Disassembler::Substitute(const Instruction *instr, const char *string) {
int Disassembler::SubstituteField(const Instruction *instr,
const char *format) {
switch (format[0]) {
- // NB. The remaining substitution prefix characters are: GJKUZ.
- case 'R': // Register. X or W, selected by sf bit.
+ // NB. The remaining substitution prefix upper-case characters are: JU.
+ case 'R': // Register. X or W, selected by sf (or alternative) bit.
case 'F': // FP register. S or D, selected by type field.
case 'V': // Vector register, V, vector format.
+ case 'Z': // Scalable vector register.
case 'W':
case 'X':
case 'B':
@@ -5103,14 +9682,14 @@ int Disassembler::SubstituteField(const Instruction *instr,
case 'D':
case 'Q':
return SubstituteRegisterField(instr, format);
+ case 'P':
+ return SubstitutePredicateRegisterField(instr, format);
case 'I':
return SubstituteImmediateField(instr, format);
case 'L':
return SubstituteLiteralField(instr, format);
case 'N':
return SubstituteShiftField(instr, format);
- case 'P':
- return SubstitutePrefetchField(instr, format);
case 'C':
return SubstituteConditionField(instr, format);
case 'E':
@@ -5127,6 +9706,15 @@ int Disassembler::SubstituteField(const Instruction *instr,
return SubstituteCrField(instr, format);
case 'G':
return SubstituteSysOpField(instr, format);
+ case 'p':
+ return SubstitutePrefetchField(instr, format);
+ case 'u':
+ case 's':
+ return SubstituteIntField(instr, format);
+ case 't':
+ return SubstituteSVESize(instr, format);
+ case '?':
+ return SubstituteTernary(instr, format);
default: {
VIXL_UNREACHABLE();
return 1;
@@ -5134,55 +9722,20 @@ int Disassembler::SubstituteField(const Instruction *instr,
}
}
+std::pair<unsigned, unsigned> Disassembler::GetRegNumForField(
+ const Instruction *instr, char reg_prefix, const char *field) {
+ unsigned reg_num = UINT_MAX;
+ unsigned field_len = 1;
-int Disassembler::SubstituteRegisterField(const Instruction *instr,
- const char *format) {
- char reg_prefix = format[0];
- unsigned reg_num = 0;
- unsigned field_len = 2;
-
- switch (format[1]) {
+ switch (field[0]) {
case 'd':
reg_num = instr->GetRd();
- if (format[2] == 'q') {
- reg_prefix = instr->GetNEONQ() ? 'X' : 'W';
- field_len = 3;
- }
break;
case 'n':
reg_num = instr->GetRn();
break;
case 'm':
reg_num = instr->GetRm();
- switch (format[2]) {
- // Handle registers tagged with b (bytes), z (instruction), or
- // r (registers), used for address updates in
- // NEON load/store instructions.
- case 'r':
- case 'b':
- case 'z': {
- field_len = 3;
- char *eimm;
- int imm = static_cast<int>(strtol(&format[3], &eimm, 10));
- field_len += eimm - &format[3];
- if (reg_num == 31) {
- switch (format[2]) {
- case 'z':
- imm *= (1 << instr->GetNEONLSSize());
- break;
- case 'r':
- imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes
- : kQRegSizeInBytes;
- break;
- case 'b':
- break;
- }
- AppendToOutput("#%d", imm);
- return field_len;
- }
- break;
- }
- }
break;
case 'e':
// This is register Rm, but using a 4-bit specifier. Used in NEON
@@ -5197,72 +9750,121 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr,
break;
case 't':
reg_num = instr->GetRt();
- if (format[0] == 'V') {
- if ((format[2] >= '2') && (format[2] <= '4')) {
- // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4.
- reg_num = (reg_num + format[2] - '1') % 32;
- field_len = 3;
- }
- } else {
- if (format[2] == '2') {
- // Handle register specifier Rt2.
- reg_num = instr->GetRt2();
- field_len = 3;
- }
- }
break;
- case '(': {
- switch (format[2]) {
- case 's':
- reg_num = instr->GetRs();
- break;
- case 't':
- reg_num = instr->GetRt();
- break;
- default:
- VIXL_UNREACHABLE();
- }
+ default:
+ VIXL_UNREACHABLE();
+ }
- VIXL_ASSERT(format[3] == '+');
- int i = 4;
- int addition = 0;
- while (format[i] != ')') {
- VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9'));
- addition *= 10;
- addition += format[i] - '0';
- ++i;
+ switch (field[1]) {
+ case '2':
+ case '3':
+ case '4':
+ if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // Vt2/3/4, Zt2/3/4
+ VIXL_ASSERT(field[0] == 't');
+ reg_num = (reg_num + field[1] - '1') % 32;
+ field_len++;
+ } else {
+ VIXL_ASSERT((field[0] == 't') && (field[1] == '2'));
+ reg_num = instr->GetRt2();
+ field_len++;
}
- reg_num += addition;
- field_len = i + 1;
break;
+ case '+': // Rt+, Rs+ (ie. Rt + 1, Rs + 1)
+ VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X'));
+ VIXL_ASSERT((field[0] == 's') || (field[0] == 't'));
+ reg_num++;
+ field_len++;
+ break;
+ case 's': // Core registers that are (w)sp rather than zr.
+ VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X'));
+ reg_num = (reg_num == kZeroRegCode) ? kSPRegInternalCode : reg_num;
+ field_len++;
+ break;
+ }
+
+ VIXL_ASSERT(reg_num != UINT_MAX);
+ return std::make_pair(reg_num, field_len);
+}
+
+int Disassembler::SubstituteRegisterField(const Instruction *instr,
+ const char *format) {
+ unsigned field_len = 1; // Initially, count only the first character.
+
+ // The first character of the register format field, eg R, X, S, etc.
+ char reg_prefix = format[0];
+
+ // Pointer to the character after the prefix. This may be one of the standard
+ // symbols representing a register encoding, or a two digit bit position,
+ // handled by the following code.
+ const char *reg_field = &format[1];
+
+ if (reg_prefix == 'R') {
+ bool is_x = instr->GetSixtyFourBits();
+ if (strspn(reg_field, "0123456789") == 2) { // r20d, r31n, etc.
+ // Core W or X registers where the type is determined by a specified bit
+ // position, eg. 'R20d, 'R05n. This is like the 'Rd syntax, where bit 31
+ // is implicitly used to select between W and X.
+ int bitpos = ((reg_field[0] - '0') * 10) + (reg_field[1] - '0');
+ VIXL_ASSERT(bitpos <= 31);
+ is_x = (instr->ExtractBit(bitpos) == 1);
+ reg_field = &format[3];
+ field_len += 2;
}
- default:
- VIXL_UNREACHABLE();
+ reg_prefix = is_x ? 'X' : 'W';
}
- // Increase field length for registers tagged as stack.
- if (format[1] != '(' && format[2] == 's') {
- field_len = 3;
+ std::pair<unsigned, unsigned> rn =
+ GetRegNumForField(instr, reg_prefix, reg_field);
+ unsigned reg_num = rn.first;
+ field_len += rn.second;
+
+ if (reg_field[0] == 'm') {
+ switch (reg_field[1]) {
+ // Handle registers tagged with b (bytes), z (instruction), or
+ // r (registers), used for address updates in NEON load/store
+ // instructions.
+ case 'r':
+ case 'b':
+ case 'z': {
+ VIXL_ASSERT(reg_prefix == 'X');
+ field_len = 3;
+ char *eimm;
+ int imm = static_cast<int>(strtol(&reg_field[2], &eimm, 10));
+ field_len += eimm - &reg_field[2];
+ if (reg_num == 31) {
+ switch (reg_field[1]) {
+ case 'z':
+ imm *= (1 << instr->GetNEONLSSize());
+ break;
+ case 'r':
+ imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes
+ : kQRegSizeInBytes;
+ break;
+ case 'b':
+ break;
+ }
+ AppendToOutput("#%d", imm);
+ return field_len;
+ }
+ break;
+ }
+ }
}
CPURegister::RegisterType reg_type = CPURegister::kRegister;
unsigned reg_size = kXRegSize;
- switch (reg_prefix) {
- case 'R':
- reg_prefix = instr->GetSixtyFourBits() ? 'X' : 'W';
- break;
- case 'F':
- switch (instr->GetFPType()) {
- case 3:
- reg_prefix = 'H';
- break;
- case 0:
- reg_prefix = 'S';
- break;
- default:
- reg_prefix = 'D';
- }
+ if (reg_prefix == 'F') {
+ switch (instr->GetFPType()) {
+ case 3:
+ reg_prefix = 'H';
+ break;
+ case 0:
+ reg_prefix = 'S';
+ break;
+ default:
+ reg_prefix = 'D';
+ }
}
switch (reg_prefix) {
@@ -5295,22 +9897,51 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr,
reg_size = kQRegSize;
break;
case 'V':
+ if (reg_field[1] == 'v') {
+ reg_type = CPURegister::kVRegister;
+ reg_size = 1 << (instr->GetSVESize() + 3);
+ field_len++;
+ break;
+ }
AppendToOutput("v%d", reg_num);
return field_len;
+ case 'Z':
+ AppendToOutput("z%d", reg_num);
+ return field_len;
default:
VIXL_UNREACHABLE();
}
- if ((reg_type == CPURegister::kRegister) && (reg_num == kZeroRegCode) &&
- (format[2] == 's')) {
- reg_num = kSPRegInternalCode;
- }
-
AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
return field_len;
}
+int Disassembler::SubstitutePredicateRegisterField(const Instruction *instr,
+ const char *format) {
+ VIXL_ASSERT(format[0] == 'P');
+ switch (format[1]) {
+ // This field only supports P register that are always encoded in the same
+ // position.
+ case 'd':
+ case 't':
+ AppendToOutput("p%u", instr->GetPt());
+ break;
+ case 'n':
+ AppendToOutput("p%u", instr->GetPn());
+ break;
+ case 'm':
+ AppendToOutput("p%u", instr->GetPm());
+ break;
+ case 'g':
+ VIXL_ASSERT(format[2] == 'l');
+ AppendToOutput("p%u", instr->GetPgLow8());
+ return 3;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ return 2;
+}
int Disassembler::SubstituteImmediateField(const Instruction *instr,
const char *format) {
@@ -5391,36 +10022,92 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
return 6;
}
case 'A': { // IAddSub.
- VIXL_ASSERT(instr->GetShiftAddSub() <= 1);
- int64_t imm = instr->GetImmAddSub() << (12 * instr->GetShiftAddSub());
+ int64_t imm = instr->GetImmAddSub() << (12 * instr->GetImmAddSubShift());
AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm);
return 7;
}
- case 'F': { // IFPHalf, IFPSingle, IFPDouble, or IFPFBits.
- if (format[3] == 'F') { // IFPFbits.
- AppendToOutput("#%" PRId32, 64 - instr->GetFPScale());
- return 8;
- } else {
- AppendToOutput("#0x%" PRIx32 " (%.4f)",
- instr->GetImmFP(),
- format[3] == 'H'
- ? FPToFloat(instr->GetImmFP16(), kIgnoreDefaultNaN)
- : (format[3] == 'S') ? instr->GetImmFP32()
- : instr->GetImmFP64());
- if (format[3] == 'H') {
- return 7;
- } else {
- return 9;
- }
+ case 'F': { // IFP, IFPNeon, IFPSve or IFPFBits.
+ int imm8 = 0;
+ int len = strlen("IFP");
+ switch (format[3]) {
+ case 'F':
+ VIXL_ASSERT(strncmp(format, "IFPFBits", strlen("IFPFBits")) == 0);
+ AppendToOutput("#%" PRId32, 64 - instr->GetFPScale());
+ return strlen("IFPFBits");
+ case 'N':
+ VIXL_ASSERT(strncmp(format, "IFPNeon", strlen("IFPNeon")) == 0);
+ imm8 = instr->GetImmNEONabcdefgh();
+ len += strlen("Neon");
+ break;
+ case 'S':
+ VIXL_ASSERT(strncmp(format, "IFPSve", strlen("IFPSve")) == 0);
+ imm8 = instr->ExtractBits(12, 5);
+ len += strlen("Sve");
+ break;
+ default:
+ VIXL_ASSERT(strncmp(format, "IFP", strlen("IFP")) == 0);
+ imm8 = instr->GetImmFP();
+ break;
}
+ AppendToOutput("#0x%" PRIx32 " (%.4f)",
+ imm8,
+ Instruction::Imm8ToFP32(imm8));
+ return len;
}
case 'H': { // IH - ImmHint
AppendToOutput("#%" PRId32, instr->GetImmHint());
return 2;
}
case 'T': { // ITri - Immediate Triangular Encoded.
- AppendToOutput("#0x%" PRIx64, instr->GetImmLogical());
- return 4;
+ if (format[4] == 'S') {
+ VIXL_ASSERT((format[5] == 'v') && (format[6] == 'e'));
+ switch (format[7]) {
+ case 'l':
+ // SVE logical immediate encoding.
+ AppendToOutput("#0x%" PRIx64, instr->GetSVEImmLogical());
+ return 8;
+ case 'p': {
+ // SVE predicated shift immediate encoding, lsl.
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(
+ /* is_predicated = */ true);
+ int lane_bits = 8 << shift_and_lane_size.second;
+ AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first);
+ return 8;
+ }
+ case 'q': {
+ // SVE predicated shift immediate encoding, asr and lsr.
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(
+ /* is_predicated = */ true);
+ AppendToOutput("#%" PRId32, shift_and_lane_size.first);
+ return 8;
+ }
+ case 'r': {
+ // SVE unpredicated shift immediate encoding, lsl.
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(
+ /* is_predicated = */ false);
+ int lane_bits = 8 << shift_and_lane_size.second;
+ AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first);
+ return 8;
+ }
+ case 's': {
+ // SVE unpredicated shift immediate encoding, asr and lsr.
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(
+ /* is_predicated = */ false);
+ AppendToOutput("#%" PRId32, shift_and_lane_size.first);
+ return 8;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+ } else {
+ AppendToOutput("#0x%" PRIx64, instr->GetImmLogical());
+ return 4;
+ }
}
case 'N': { // INzcv.
int nzcv = (instr->GetNzcv() << Flags_offset);
@@ -5442,12 +10129,21 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
AppendToOutput("#%" PRId32, instr->GetImmS());
return 8;
}
- case 'S': { // IS - Test and branch bit.
+ case 't': { // It - Test and branch bit.
AppendToOutput("#%" PRId32,
(instr->GetImmTestBranchBit5() << 5) |
instr->GetImmTestBranchBit40());
return 2;
}
+ case 'S': { // ISveSvl - SVE 'mul vl' immediate for structured ld/st.
+ VIXL_ASSERT(strncmp(format, "ISveSvl", 7) == 0);
+ int imm = instr->ExtractSignedBits(19, 16);
+ if (imm != 0) {
+ int reg_count = instr->ExtractBits(22, 21) + 1;
+ AppendToOutput(", #%d, mul vl", imm * reg_count);
+ }
+ return 7;
+ }
case 's': { // Is - Shift (immediate).
switch (format[2]) {
case '1': { // Is1 - SSHR.
@@ -5539,6 +10235,13 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
}
}
return 0;
+ } else if (strncmp(format,
+ "IVInsSVEIndex",
+ strlen("IVInsSVEIndex")) == 0) {
+ std::pair<int, int> index_and_lane_size =
+ instr->GetSVEPermuteIndexAndLaneSizeLog2();
+ AppendToOutput("%d", index_and_lane_size.first);
+ return strlen("IVInsSVEIndex");
}
VIXL_FALLTHROUGH();
}
@@ -5547,27 +10250,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
return 9;
}
case 'M': { // Modified Immediate cases.
- if (strncmp(format, "IVMIImmFPHalf", strlen("IVMIImmFPHalf")) == 0) {
- AppendToOutput("#0x%" PRIx32 " (%.4f)",
- instr->GetImmNEONabcdefgh(),
- FPToFloat(instr->GetImmNEONFP16(),
- kIgnoreDefaultNaN));
- return strlen("IVMIImmFPHalf");
- } else if (strncmp(format,
- "IVMIImmFPSingle",
- strlen("IVMIImmFPSingle")) == 0) {
- AppendToOutput("#0x%" PRIx32 " (%.4f)",
- instr->GetImmNEONabcdefgh(),
- instr->GetImmNEONFP32());
- return strlen("IVMIImmFPSingle");
- } else if (strncmp(format,
- "IVMIImmFPDouble",
- strlen("IVMIImmFPDouble")) == 0) {
- AppendToOutput("#0x%" PRIx32 " (%.4f)",
- instr->GetImmNEONabcdefgh(),
- instr->GetImmNEONFP64());
- return strlen("IVMIImmFPDouble");
- } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
+ if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
uint64_t imm8 = instr->GetImmNEONabcdefgh();
AppendToOutput("#0x%" PRIx64, imm8);
return strlen("IVMIImm8");
@@ -5647,6 +10330,48 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
}
}
}
+ case 'p': { // Ipc - SVE predicate constraint specifier.
+ VIXL_ASSERT(format[2] == 'c');
+ unsigned pattern = instr->GetImmSVEPredicateConstraint();
+ switch (pattern) {
+ // VL1-VL8 are encoded directly.
+ case SVE_VL1:
+ case SVE_VL2:
+ case SVE_VL3:
+ case SVE_VL4:
+ case SVE_VL5:
+ case SVE_VL6:
+ case SVE_VL7:
+ case SVE_VL8:
+ AppendToOutput("vl%u", pattern);
+ break;
+ // VL16-VL256 are encoded as log2(N) + c.
+ case SVE_VL16:
+ case SVE_VL32:
+ case SVE_VL64:
+ case SVE_VL128:
+ case SVE_VL256:
+ AppendToOutput("vl%u", 16 << (pattern - SVE_VL16));
+ break;
+ // Special cases.
+ case SVE_POW2:
+ AppendToOutput("pow2");
+ break;
+ case SVE_MUL4:
+ AppendToOutput("mul4");
+ break;
+ case SVE_MUL3:
+ AppendToOutput("mul3");
+ break;
+ case SVE_ALL:
+ AppendToOutput("all");
+ break;
+ default:
+ AppendToOutput("#0x%x", pattern);
+ break;
+ }
+ return 3;
+ }
default: {
VIXL_UNIMPLEMENTED();
return 0;
@@ -5736,11 +10461,11 @@ int Disassembler::SubstituteShiftField(const Instruction *instr,
VIXL_ASSERT(instr->GetShiftDP() <= 0x3);
switch (format[1]) {
- case 'D': { // HDP.
+ case 'D': { // NDP.
VIXL_ASSERT(instr->GetShiftDP() != ROR);
VIXL_FALLTHROUGH();
}
- case 'L': { // HLo.
+ case 'L': { // NLo.
if (instr->GetImmDPShift() != 0) {
const char *shift_type[] = {"lsl", "lsr", "asr", "ror"};
AppendToOutput(", %s #%" PRId32,
@@ -5749,6 +10474,14 @@ int Disassembler::SubstituteShiftField(const Instruction *instr,
}
return 3;
}
+ case 'S': { // NSveS (SVE structured load/store indexing shift).
+ VIXL_ASSERT(strncmp(format, "NSveS", 5) == 0);
+ int msz = instr->ExtractBits(24, 23);
+ if (msz > 0) {
+ AppendToOutput(", lsl #%d", msz);
+ }
+ return 5;
+ }
default:
VIXL_UNIMPLEMENTED();
return 0;
@@ -5919,30 +10652,43 @@ int Disassembler::SubstituteLSRegOffsetField(const Instruction *instr,
int Disassembler::SubstitutePrefetchField(const Instruction *instr,
const char *format) {
- VIXL_ASSERT(format[0] == 'P');
+ VIXL_ASSERT(format[0] == 'p');
USE(format);
- static const char *hints[] = {"ld", "li", "st"};
+ bool is_sve =
+ (strncmp(format, "prefSVEOp", strlen("prefSVEOp")) == 0) ? true : false;
+ int placeholder_length = is_sve ? 9 : 6;
static const char *stream_options[] = {"keep", "strm"};
- unsigned hint = instr->GetPrefetchHint();
+ auto get_hints = [](bool is_sve) -> std::vector<std::string> {
+ static const std::vector<std::string> sve_hints = {"ld", "st"};
+ static const std::vector<std::string> core_hints = {"ld", "li", "st"};
+ return (is_sve) ? sve_hints : core_hints;
+ };
+
+ std::vector<std::string> hints = get_hints(is_sve);
+ unsigned hint =
+ is_sve ? instr->GetSVEPrefetchHint() : instr->GetPrefetchHint();
unsigned target = instr->GetPrefetchTarget() + 1;
unsigned stream = instr->GetPrefetchStream();
- if ((hint >= ArrayLength(hints)) || (target > 3)) {
+ if ((hint >= hints.size()) || (target > 3)) {
// Unallocated prefetch operations.
- int prefetch_mode = instr->GetImmPrefetchOperation();
- AppendToOutput("#0b%c%c%c%c%c",
- (prefetch_mode & (1 << 4)) ? '1' : '0',
- (prefetch_mode & (1 << 3)) ? '1' : '0',
- (prefetch_mode & (1 << 2)) ? '1' : '0',
- (prefetch_mode & (1 << 1)) ? '1' : '0',
- (prefetch_mode & (1 << 0)) ? '1' : '0');
+ if (is_sve) {
+ std::bitset<4> prefetch_mode(instr->GetSVEImmPrefetchOperation());
+ AppendToOutput("#0b%s", prefetch_mode.to_string().c_str());
+ } else {
+ std::bitset<5> prefetch_mode(instr->GetImmPrefetchOperation());
+ AppendToOutput("#0b%s", prefetch_mode.to_string().c_str());
+ }
} else {
VIXL_ASSERT(stream < ArrayLength(stream_options));
- AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]);
+ AppendToOutput("p%sl%d%s",
+ hints[hint].c_str(),
+ target,
+ stream_options[stream]);
}
- return 6;
+ return placeholder_length;
}
int Disassembler::SubstituteBarrierField(const Instruction *instr,
@@ -5997,6 +10743,159 @@ int Disassembler::SubstituteCrField(const Instruction *instr,
return 2;
}
+int Disassembler::SubstituteIntField(const Instruction *instr,
+ const char *format) {
+ VIXL_ASSERT((format[0] == 'u') || (format[0] == 's'));
+
+ // A generic signed or unsigned int field uses a placeholder of the form
+ // 'sAABB and 'uAABB respectively where AA and BB are two digit bit positions
+ // between 00 and 31, and AA >= BB. The placeholder is substituted with the
+ // decimal integer represented by the bits in the instruction between
+ // positions AA and BB inclusive.
+ //
+ // In addition, split fields can be represented using 'sAABB:CCDD, where CCDD
+ // become the least-significant bits of the result, and bit AA is the sign bit
+ // (if 's is used).
+ int32_t bits = 0;
+ int width = 0;
+ const char *c = format;
+ do {
+ c++; // Skip the 'u', 's' or ':'.
+ VIXL_ASSERT(strspn(c, "0123456789") == 4);
+ int msb = ((c[0] - '0') * 10) + (c[1] - '0');
+ int lsb = ((c[2] - '0') * 10) + (c[3] - '0');
+ c += 4; // Skip the characters we just read.
+ int chunk_width = msb - lsb + 1;
+ VIXL_ASSERT((chunk_width > 0) && (chunk_width < 32));
+ bits = (bits << chunk_width) | (instr->ExtractBits(msb, lsb));
+ width += chunk_width;
+ } while (*c == ':');
+ VIXL_ASSERT(IsUintN(width, bits));
+
+ if (format[0] == 's') {
+ bits = ExtractSignedBitfield32(width - 1, 0, bits);
+ }
+
+ if (*c == '+') {
+ // A "+n" trailing the format specifier indicates the extracted value should
+ // be incremented by n. This is for cases where the encoding is zero-based,
+ // but range of values is not, eg. values [1, 16] encoded as [0, 15]
+ char *new_c;
+ uint64_t value = strtoul(c + 1, &new_c, 10);
+ c = new_c;
+ VIXL_ASSERT(IsInt32(value));
+ bits += value;
+ } else if (*c == '*') {
+ // Similarly, a "*n" trailing the format specifier indicates the extracted
+ // value should be multiplied by n. This is for cases where the encoded
+ // immediate is scaled, for example by access size.
+ char *new_c;
+ uint64_t value = strtoul(c + 1, &new_c, 10);
+ c = new_c;
+ VIXL_ASSERT(IsInt32(value));
+ bits *= value;
+ }
+
+ AppendToOutput("%d", bits);
+
+ return static_cast<int>(c - format);
+}
+
+int Disassembler::SubstituteSVESize(const Instruction *instr,
+ const char *format) {
+ USE(format);
+ VIXL_ASSERT(format[0] == 't');
+
+ static const char sizes[] = {'b', 'h', 's', 'd', 'q'};
+ // TODO: only the most common case for <size> is supported at the moment,
+ // and even then, the RESERVED values are handled as if they're not
+ // reserved.
+ unsigned size_in_bytes_log2 = instr->GetSVESize();
+ int placeholder_length = 1;
+ switch (format[1]) {
+ case 'l':
+ placeholder_length++;
+ if (format[2] == 's') {
+ // 'tls: Loads and stores
+ size_in_bytes_log2 = instr->ExtractBits(22, 21);
+ placeholder_length++;
+ if (format[3] == 's') {
+ // Sign extension load.
+ unsigned msize = instr->ExtractBits(24, 23);
+ if (msize > size_in_bytes_log2) size_in_bytes_log2 ^= 0x3;
+ placeholder_length++;
+ }
+ } else {
+ // 'tl: Logical operations
+ size_in_bytes_log2 = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+ }
+ break;
+ case 'm': // 'tmsz
+ VIXL_ASSERT(strncmp(format, "tmsz", 4) == 0);
+ placeholder_length += 3;
+ size_in_bytes_log2 = instr->ExtractBits(24, 23);
+ break;
+ case 's':
+ if (format[2] == 'z') {
+ VIXL_ASSERT((format[3] == 'x') || (format[3] == 's') ||
+ (format[3] == 'p'));
+ if (format[3] == 'x') {
+ // 'tszx: Indexes.
+ std::pair<int, int> index_and_lane_size =
+ instr->GetSVEPermuteIndexAndLaneSizeLog2();
+ size_in_bytes_log2 = index_and_lane_size.second;
+ } else if (format[3] == 'p') {
+ // 'tszp: Predicated shifts.
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
+ size_in_bytes_log2 = shift_and_lane_size.second;
+ } else {
+ // 'tszs: Unpredicated shifts.
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ size_in_bytes_log2 = shift_and_lane_size.second;
+ }
+ placeholder_length += 3; // skip `sz[x|s]`
+ }
+ break;
+ case 'h':
+ // Half size of the lane size field.
+ size_in_bytes_log2 -= 1;
+ placeholder_length++;
+ break;
+ case 'q':
+ // Quarter size of the lane size field.
+ size_in_bytes_log2 -= 2;
+ placeholder_length++;
+ break;
+ default:
+ break;
+ }
+
+ VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(sizes));
+ AppendToOutput("%c", sizes[size_in_bytes_log2]);
+
+ return placeholder_length;
+}
+
+int Disassembler::SubstituteTernary(const Instruction *instr,
+ const char *format) {
+ VIXL_ASSERT((format[0] == '?') && (format[3] == ':'));
+
+ // The ternary substitution of the format "'?bb:TF" is replaced by a single
+ // character, either T or F, depending on the value of the bit at position
+ // bb in the instruction. For example, "'?31:xw" is substituted with "x" if
+ // bit 31 is true, and "w" otherwise.
+ VIXL_ASSERT(strspn(&format[1], "0123456789") == 2);
+ char *c;
+ uint64_t value = strtoul(&format[1], &c, 10);
+ VIXL_ASSERT(value < (kInstructionSize * kBitsPerByte));
+ VIXL_ASSERT((*c == ':') && (strlen(c) >= 3)); // Minimum of ":TF"
+ c++;
+ AppendToOutput("%c", c[1 - instr->ExtractBit(static_cast<int>(value))]);
+ return 6;
+}
+
void Disassembler::ResetOutput() {
buffer_pos_ = 0;
buffer_[buffer_pos_] = 0;
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index c650bee9..b59840aa 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -27,6 +27,8 @@
#ifndef VIXL_AARCH64_DISASM_AARCH64_H
#define VIXL_AARCH64_DISASM_AARCH64_H
+#include <utility>
+
#include "../globals-vixl.h"
#include "../utils-vixl.h"
@@ -112,10 +114,13 @@ class Disassembler : public DecoderVisitor {
private:
void Format(const Instruction* instr,
const char* mnemonic,
- const char* format);
+ const char* format0,
+ const char* format1 = NULL);
void Substitute(const Instruction* instr, const char* string);
int SubstituteField(const Instruction* instr, const char* format);
int SubstituteRegisterField(const Instruction* instr, const char* format);
+ int SubstitutePredicateRegisterField(const Instruction* instr,
+ const char* format);
int SubstituteImmediateField(const Instruction* instr, const char* format);
int SubstituteLiteralField(const Instruction* instr, const char* format);
int SubstituteBitfieldImmediateField(const Instruction* instr,
@@ -130,6 +135,14 @@ class Disassembler : public DecoderVisitor {
int SubstituteBarrierField(const Instruction* instr, const char* format);
int SubstituteSysOpField(const Instruction* instr, const char* format);
int SubstituteCrField(const Instruction* instr, const char* format);
+ int SubstituteIntField(const Instruction* instr, const char* format);
+ int SubstituteSVESize(const Instruction* instr, const char* format);
+ int SubstituteTernary(const Instruction* instr, const char* format);
+
+ std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr,
+ char reg_prefix,
+ const char* field);
+
bool RdIsZROrSP(const Instruction* instr) const {
return (instr->GetRd() == kZeroRegCode);
}
diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc
index a99a0459..b3e28384 100644
--- a/src/aarch64/instructions-aarch64.cc
+++ b/src/aarch64/instructions-aarch64.cc
@@ -35,7 +35,8 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
unsigned width) {
VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
(width == 32));
- VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+ VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
+ (reg_size == kSRegSize) || (reg_size == kDRegSize));
uint64_t result = value & ((UINT64_C(1) << width) - 1);
for (unsigned i = width; i < reg_size; i *= 2) {
result |= (result << i);
@@ -43,6 +44,503 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
return result;
}
+bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const {
+ bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z;
+ bool movprfx_is_unpredicated =
+ movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z;
+ VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated);
+
+ int movprfx_zd = movprfx->GetRd();
+ int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1;
+ VectorFormat movprfx_vform =
+ movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined;
+
+ bool pg_matches_low8 = movprfx_pg == GetPgLow8();
+ bool vform_matches = movprfx_vform == GetSVEVectorFormat();
+ bool zd_matches = movprfx_zd == GetRd();
+ bool zd_matches_zm = movprfx_zd == GetRm();
+ bool zd_matches_zn = movprfx_zd == GetRn();
+
+ switch (Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) {
+ case AND_z_zi:
+ case EOR_z_zi:
+ case ORR_z_zi:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEBitwiseLogical_PredicatedMask)) {
+ case AND_z_p_zz:
+ case BIC_z_p_zz:
+ case EOR_z_p_zz:
+ case ORR_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
+ case ASRD_z_p_zi:
+ case ASR_z_p_zi:
+ case LSL_z_p_zi:
+ case LSR_z_p_zi:
+ if (movprfx_is_predicated) {
+ if (!pg_matches_low8) return false;
+ unsigned tsz = ExtractBits<0x00c00300>();
+ VectorFormat instr_vform =
+ SVEFormatFromLaneSizeInBytesLog2(HighestSetBitPosition(tsz));
+ if (movprfx_vform != instr_vform) return false;
+ }
+ return zd_matches;
+ }
+ switch (Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
+ case ASRR_z_p_zz:
+ case ASR_z_p_zz:
+ case LSLR_z_p_zz:
+ case LSL_z_p_zz:
+ case LSRR_z_p_zz:
+ case LSR_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
+ case ASR_z_p_zw:
+ case LSL_z_p_zw:
+ case LSR_z_p_zw:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEConditionallyBroadcastElementToVectorMask)) {
+ case CLASTA_z_p_zz:
+ case CLASTB_z_p_zz:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVECopyFPImm_PredicatedMask)) {
+ case FCPY_z_p_i:
+ if (movprfx_is_predicated) {
+ if (!vform_matches) return false;
+ if (movprfx_pg != GetRx<19, 16>()) return false;
+ }
+ return zd_matches;
+ }
+ switch (Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
+ case CPY_z_p_r:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches;
+ }
+ switch (Mask(SVECopyIntImm_PredicatedMask)) {
+ case CPY_z_p_i:
+ if (movprfx_is_predicated) {
+ if (!vform_matches) return false;
+ if (movprfx_pg != GetRx<19, 16>()) return false;
+ }
+ // Only the merging form can take movprfx.
+ if (ExtractBit(14) == 0) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
+ case CPY_z_p_v:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
+ case FADD_z_p_zs:
+ case FMAXNM_z_p_zs:
+ case FMAX_z_p_zs:
+ case FMINNM_z_p_zs:
+ case FMIN_z_p_zs:
+ case FMUL_z_p_zs:
+ case FSUBR_z_p_zs:
+ case FSUB_z_p_zs:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches;
+ }
+ switch (Mask(SVEFPArithmetic_PredicatedMask)) {
+ case FABD_z_p_zz:
+ case FADD_z_p_zz:
+ case FDIVR_z_p_zz:
+ case FDIV_z_p_zz:
+ case FMAXNM_z_p_zz:
+ case FMAX_z_p_zz:
+ case FMINNM_z_p_zz:
+ case FMIN_z_p_zz:
+ case FMULX_z_p_zz:
+ case FMUL_z_p_zz:
+ case FSCALE_z_p_zz:
+ case FSUBR_z_p_zz:
+ case FSUB_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEFPComplexAdditionMask)) {
+ case FCADD_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEFPComplexMulAddIndexMask)) {
+ case FCMLA_z_zzzi_h:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<18, 16>()) return false;
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+ case FCMLA_z_zzzi_s:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<19, 16>()) return false;
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPComplexMulAddMask)) {
+ case FCMLA_z_p_zzz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zm && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPConvertPrecisionMask)) {
+ case FCVT_z_p_z_d2h:
+ case FCVT_z_p_z_d2s:
+ case FCVT_z_p_z_h2d:
+ case FCVT_z_p_z_h2s:
+ case FCVT_z_p_z_s2d:
+ case FCVT_z_p_z_s2h:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPConvertToIntMask)) {
+ case FCVTZS_z_p_z_d2w:
+ case FCVTZS_z_p_z_d2x:
+ case FCVTZS_z_p_z_fp162h:
+ case FCVTZS_z_p_z_fp162w:
+ case FCVTZS_z_p_z_fp162x:
+ case FCVTZS_z_p_z_s2w:
+ case FCVTZS_z_p_z_s2x:
+ case FCVTZU_z_p_z_d2w:
+ case FCVTZU_z_p_z_d2x:
+ case FCVTZU_z_p_z_fp162h:
+ case FCVTZU_z_p_z_fp162w:
+ case FCVTZU_z_p_z_fp162x:
+ case FCVTZU_z_p_z_s2w:
+ case FCVTZU_z_p_z_s2x:
+ if (movprfx_is_predicated) {
+ if (!pg_matches_low8) return false;
+ // The movprfx element size must match the instruction's maximum encoded
+ // element size. We have to partially decode the opc and opc2 fields to
+ // find this.
+ unsigned opc = ExtractBits(23, 22);
+ unsigned opc2 = ExtractBits(18, 17);
+ VectorFormat instr_vform =
+ SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2));
+ if (movprfx_vform != instr_vform) return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPMulAddIndexMask)) {
+ case FMLA_z_zzzi_h:
+ case FMLA_z_zzzi_h_i3h:
+ case FMLA_z_zzzi_s:
+ case FMLS_z_zzzi_h:
+ case FMLS_z_zzzi_h_i3h:
+ case FMLS_z_zzzi_s:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<18, 16>()) return false;
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+ case FMLA_z_zzzi_d:
+ case FMLS_z_zzzi_d:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<19, 16>()) return false;
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPMulAddMask)) {
+ case FMAD_z_p_zzz:
+ case FMSB_z_p_zzz:
+ case FNMAD_z_p_zzz:
+ case FNMSB_z_p_zzz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<20, 16>()) return false;
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ case FMLA_z_p_zzz:
+ case FMLS_z_p_zzz:
+ case FNMLA_z_p_zzz:
+ case FNMLS_z_p_zzz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zm && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPRoundToIntegralValueMask)) {
+ case FRINTA_z_p_z:
+ case FRINTI_z_p_z:
+ case FRINTM_z_p_z:
+ case FRINTN_z_p_z:
+ case FRINTP_z_p_z:
+ case FRINTX_z_p_z:
+ case FRINTZ_z_p_z:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEFPTrigMulAddCoefficientMask)) {
+ case FTMAD_z_zzi:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEFPUnaryOpMask)) {
+ case FRECPX_z_p_z:
+ case FSQRT_z_p_z:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEIncDecByPredicateCountMask)) {
+ case DECP_z_p_z:
+ case INCP_z_p_z:
+ case SQDECP_z_p_z:
+ case SQINCP_z_p_z:
+ case UQDECP_z_p_z:
+ case UQINCP_z_p_z:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEIncDecVectorByElementCountMask)) {
+ case DECD_z_zs:
+ case DECH_z_zs:
+ case DECW_z_zs:
+ case INCD_z_zs:
+ case INCH_z_zs:
+ case INCW_z_zs:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEInsertGeneralRegisterMask)) {
+ case INSR_z_r:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEInsertSIMDFPScalarRegisterMask)) {
+ case INSR_z_v:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
+ case ADD_z_zi:
+ case SQADD_z_zi:
+ case SQSUB_z_zi:
+ case SUBR_z_zi:
+ case SUB_z_zi:
+ case UQADD_z_zi:
+ case UQSUB_z_zi:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
+ case ADD_z_p_zz:
+ case SUBR_z_p_zz:
+ case SUB_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEIntConvertToFPMask)) {
+ case SCVTF_z_p_z_h2fp16:
+ case SCVTF_z_p_z_w2d:
+ case SCVTF_z_p_z_w2fp16:
+ case SCVTF_z_p_z_w2s:
+ case SCVTF_z_p_z_x2d:
+ case SCVTF_z_p_z_x2fp16:
+ case SCVTF_z_p_z_x2s:
+ case UCVTF_z_p_z_h2fp16:
+ case UCVTF_z_p_z_w2d:
+ case UCVTF_z_p_z_w2fp16:
+ case UCVTF_z_p_z_w2s:
+ case UCVTF_z_p_z_x2d:
+ case UCVTF_z_p_z_x2fp16:
+ case UCVTF_z_p_z_x2s:
+ if (movprfx_is_predicated) {
+ if (!pg_matches_low8) return false;
+ // The movprfx element size must match the instruction's maximum encoded
+ // element size. We have to partially decode the opc and opc2 fields to
+ // find this.
+ unsigned opc = ExtractBits(23, 22);
+ unsigned opc2 = ExtractBits(18, 17);
+ VectorFormat instr_vform =
+ SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2));
+ if (movprfx_vform != instr_vform) return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEIntDivideVectors_PredicatedMask)) {
+ case SDIVR_z_p_zz:
+ case SDIV_z_p_zz:
+ case UDIVR_z_p_zz:
+ case UDIV_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEIntMinMaxDifference_PredicatedMask)) {
+ case SABD_z_p_zz:
+ case SMAX_z_p_zz:
+ case SMIN_z_p_zz:
+ case UABD_z_p_zz:
+ case UMAX_z_p_zz:
+ case UMIN_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
+ case SMAX_z_zi:
+ case SMIN_z_zi:
+ case UMAX_z_zi:
+ case UMIN_z_zi:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEIntMulAddPredicatedMask)) {
+ case MAD_z_p_zzz:
+ case MSB_z_p_zzz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches && !zd_matches_zm;
+ case MLA_z_p_zzz:
+ case MLS_z_p_zzz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zm && !zd_matches_zn;
+ }
+ switch (Mask(SVEIntMulAddUnpredicatedMask)) {
+ case SDOT_z_zzz:
+ case UDOT_z_zzz:
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zm &&
+ !zd_matches_zn;
+ }
+ switch (Mask(SVEIntMulImm_UnpredicatedMask)) {
+ case MUL_z_zi:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEIntMulVectors_PredicatedMask)) {
+ case MUL_z_p_zz:
+ case SMULH_z_p_zz:
+ case UMULH_z_p_zz:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return zd_matches;
+ }
+ switch (Mask(SVEIntUnaryArithmeticPredicatedMask)) {
+ case ABS_z_p_z:
+ case CLS_z_p_z:
+ case CLZ_z_p_z:
+ case CNOT_z_p_z:
+ case CNT_z_p_z:
+ case FABS_z_p_z:
+ case FNEG_z_p_z:
+ case NEG_z_p_z:
+ case NOT_z_p_z:
+ case SXTB_z_p_z:
+ case SXTH_z_p_z:
+ case SXTW_z_p_z:
+ case UXTB_z_p_z:
+ case UXTH_z_p_z:
+ case UXTW_z_p_z:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEMulIndexMask)) {
+ case SDOT_z_zzzi_s:
+ case UDOT_z_zzzi_s:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<18, 16>()) return false;
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+ case SDOT_z_zzzi_d:
+ case UDOT_z_zzzi_d:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<19, 16>()) return false;
+ return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVEPermuteVectorExtractMask)) {
+ case EXT_z_zi_des:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEReverseWithinElementsMask)) {
+ case RBIT_z_p_z:
+ case REVB_z_z:
+ case REVH_z_z:
+ case REVW_z_z:
+ if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+ return false;
+ }
+ return zd_matches && !zd_matches_zn;
+ }
+ switch (Mask(SVESaturatingIncDecVectorByElementCountMask)) {
+ case SQDECD_z_zs:
+ case SQDECH_z_zs:
+ case SQDECW_z_zs:
+ case SQINCD_z_zs:
+ case SQINCH_z_zs:
+ case SQINCW_z_zs:
+ case UQDECD_z_zs:
+ case UQDECH_z_zs:
+ case UQDECW_z_zs:
+ case UQINCD_z_zs:
+ case UQINCH_z_zs:
+ case UQINCW_z_zs:
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ switch (Mask(SVEVectorSplice_DestructiveMask)) {
+ case SPLICE_z_p_zz_des:
+ // The movprfx's `zd` must not alias any other inputs.
+ if (movprfx_zd == GetRx<9, 5>()) return false;
+ return movprfx_is_unpredicated && zd_matches;
+ }
+ return false;
+} // NOLINT(readability/fn_size)
bool Instruction::IsLoad() const {
if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
@@ -103,6 +601,16 @@ bool Instruction::IsStore() const {
}
+std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const {
+ uint32_t imm_2 = ExtractBits<0x00C00000>();
+ uint32_t tsz_5 = ExtractBits<0x001F0000>();
+ uint32_t imm_7 = (imm_2 << 5) | tsz_5;
+ int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5);
+ int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7);
+ return std::make_pair(index, lane_size_in_byte_log_2);
+}
+
+
// Logical immediates can't encode zero, so a return value of zero is used to
// indicate a failure case. Specifically, where the constraints on imm_s are
// not met.
@@ -111,7 +619,108 @@ uint64_t Instruction::GetImmLogical() const {
int32_t n = GetBitN();
int32_t imm_s = GetImmSetBits();
int32_t imm_r = GetImmRotate();
+ return DecodeImmBitMask(n, imm_s, imm_r, reg_size);
+}
+
+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::GetSVEImmLogical() const {
+ int n = GetSVEBitN();
+ int imm_s = GetSVEImmSetBits();
+ int imm_r = GetSVEImmRotate();
+ int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2();
+ switch (lane_size_in_bytes_log2) {
+ case kDRegSizeInBytesLog2:
+ case kSRegSizeInBytesLog2:
+ case kHRegSizeInBytesLog2:
+ case kBRegSizeInBytesLog2: {
+ int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3);
+ return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits);
+ }
+ default:
+ return 0;
+ }
+}
+std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2(
+ bool is_predicated) const {
+ Instr tsize =
+ is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>();
+ Instr imm_3 =
+ is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>();
+ if (tsize == 0) {
+ // The bit field `tsize` means undefined if it is zero, so return a
+ // convenience value kWMinInt to indicate a failure case.
+ return std::make_pair(kWMinInt, kWMinInt);
+ }
+
+ int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1;
+ int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte;
+ int shift = (2 * esize) - ((tsize << 3) | imm_3);
+ return std::make_pair(shift, lane_size_in_bytes_log_2);
+}
+
+int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const {
+ Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb);
+ if (is_signed) {
+ dtype_h = dtype_h ^ 0x3;
+ }
+ return dtype_h;
+}
+
+int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const {
+ Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb);
+ if (is_signed) {
+ dtype_l = dtype_l ^ 0x3;
+ }
+ return dtype_l;
+}
+
+int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const {
+ int n = GetSVEBitN();
+ int imm_s = GetSVEImmSetBits();
+ unsigned type_bitset =
+ (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width));
+
+ // An lane size is constructed from the n and imm_s bits according to
+ // the following table:
+ //
+ // N imms size
+ // 0 0xxxxx 32
+ // 0 10xxxx 16
+ // 0 110xxx 8
+ // 0 1110xx 8
+ // 0 11110x 8
+ // 1 xxxxxx 64
+
+ if (type_bitset == 0) {
+ // Bail out early since `HighestSetBitPosition` doesn't accept zero
+ // value input.
+ return -1;
+ }
+
+ switch (HighestSetBitPosition(type_bitset)) {
+ case 6:
+ return kDRegSizeInBytesLog2;
+ case 5:
+ return kSRegSizeInBytesLog2;
+ case 4:
+ return kHRegSizeInBytesLog2;
+ case 3:
+ case 2:
+ case 1:
+ return kBRegSizeInBytesLog2;
+ default:
+ // RESERVED encoding.
+ return -1;
+ }
+}
+
+uint64_t Instruction::DecodeImmBitMask(int32_t n,
+ int32_t imm_s,
+ int32_t imm_r,
+ int32_t size) const {
// An integer is constructed from the n, imm_s and imm_r bits according to
// the following table:
//
@@ -146,7 +755,7 @@ uint64_t Instruction::GetImmLogical() const {
return 0;
}
uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
- return RepeatBitsAcrossReg(reg_size,
+ return RepeatBitsAcrossReg(size,
RotateRight(bits, imm_r & mask, width),
width);
}
@@ -397,8 +1006,6 @@ void Instruction::SetImmLLiteral(const Instruction* source) {
VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
- VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
- vform == kFormatH || vform == kFormatS || vform == kFormatD);
switch (vform) {
case kFormat8H:
return kFormat8B;
@@ -412,6 +1019,13 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
return kFormatH;
case kFormatD:
return kFormatS;
+ case kFormatVnH:
+ return kFormatVnB;
+ case kFormatVnS:
+ return kFormatVnH;
+ case kFormatVnD:
+ return kFormatVnS;
+ break;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@@ -480,6 +1094,12 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) {
return kFormat2S;
case kFormat2D:
return kFormat4S;
+ case kFormatVnH:
+ return kFormatVnB;
+ case kFormatVnS:
+ return kFormatVnH;
+ case kFormatVnD:
+ return kFormatVnS;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@@ -518,8 +1138,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) {
}
-VectorFormat ScalarFormatFromLaneSize(int laneSize) {
- switch (laneSize) {
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) {
+ switch (lane_size_in_bits) {
case 8:
return kFormatB;
case 16:
@@ -535,6 +1155,69 @@ VectorFormat ScalarFormatFromLaneSize(int laneSize) {
}
+bool IsSVEFormat(VectorFormat vform) {
+ switch (vform) {
+ case kFormatVnB:
+ case kFormatVnH:
+ case kFormatVnS:
+ case kFormatVnD:
+ case kFormatVnQ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) {
+ switch (lane_size_in_bytes) {
+ case 1:
+ return kFormatVnB;
+ case 2:
+ return kFormatVnH;
+ case 4:
+ return kFormatVnS;
+ case 8:
+ return kFormatVnD;
+ case 16:
+ return kFormatVnQ;
+ default:
+ VIXL_UNREACHABLE();
+ return kFormatUndefined;
+ }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) {
+ switch (lane_size_in_bits) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte);
+ default:
+ VIXL_UNREACHABLE();
+ return kFormatUndefined;
+ }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) {
+ switch (lane_size_in_bytes_log2) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2);
+ default:
+ VIXL_UNREACHABLE();
+ return kFormatUndefined;
+ }
+}
+
+
VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
}
@@ -542,6 +1225,7 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
VIXL_ASSERT(vform != kFormatUndefined);
+ VIXL_ASSERT(!IsSVEFormat(vform));
switch (vform) {
case kFormatB:
return kBRegSize;
@@ -551,14 +1235,19 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
case kFormat2H:
return kSRegSize;
case kFormatD:
- return kDRegSize;
case kFormat8B:
case kFormat4H:
case kFormat2S:
case kFormat1D:
return kDRegSize;
- default:
+ case kFormat16B:
+ case kFormat8H:
+ case kFormat4S:
+ case kFormat2D:
return kQRegSize;
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
}
}
@@ -574,20 +1263,26 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
case kFormatB:
case kFormat8B:
case kFormat16B:
+ case kFormatVnB:
return 8;
case kFormatH:
case kFormat2H:
case kFormat4H:
case kFormat8H:
+ case kFormatVnH:
return 16;
case kFormatS:
case kFormat2S:
case kFormat4S:
+ case kFormatVnS:
return 32;
case kFormatD:
case kFormat1D:
case kFormat2D:
+ case kFormatVnD:
return 64;
+ case kFormatVnQ:
+ return 128;
default:
VIXL_UNREACHABLE();
return 0;
@@ -606,20 +1301,26 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
case kFormatB:
case kFormat8B:
case kFormat16B:
+ case kFormatVnB:
return 0;
case kFormatH:
case kFormat2H:
case kFormat4H:
case kFormat8H:
+ case kFormatVnH:
return 1;
case kFormatS:
case kFormat2S:
case kFormat4S:
+ case kFormatVnS:
return 2;
case kFormatD:
case kFormat1D:
case kFormat2D:
+ case kFormatVnD:
return 3;
+ case kFormatVnQ:
+ return 4;
default:
VIXL_UNREACHABLE();
return 0;
@@ -697,17 +1398,19 @@ bool IsVectorFormat(VectorFormat vform) {
int64_t MaxIntFromFormat(VectorFormat vform) {
- return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ return static_cast<int64_t>(GetUintMask(lane_size) >> 1);
}
int64_t MinIntFromFormat(VectorFormat vform) {
- return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
+ return -MaxIntFromFormat(vform) - 1;
}
uint64_t MaxUintFromFormat(VectorFormat vform) {
- return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+ return GetUintMask(LaneSizeInBitsFromFormat(vform));
}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h
index 6d4f96b4..5f56ae16 100644
--- a/src/aarch64/instructions-aarch64.h
+++ b/src/aarch64/instructions-aarch64.h
@@ -81,6 +81,7 @@ const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
const uint64_t kHRegMask = UINT64_C(0xffff);
const uint64_t kSRegMask = UINT64_C(0xffffffff);
const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kHSignMask = UINT64_C(0x8000);
const uint64_t kSSignMask = UINT64_C(0x80000000);
const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
const uint64_t kWSignMask = UINT64_C(0x80000000);
@@ -116,6 +117,30 @@ VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
const uint64_t kTTBRMask = UINT64_C(1) << 55;
+// We can't define a static kZRegSize because the size depends on the
+// implementation. However, it is sometimes useful to know the minimum and
+// maxmimum possible sizes.
+const unsigned kZRegMinSize = 128;
+const unsigned kZRegMinSizeLog2 = 7;
+const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8;
+const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3;
+const unsigned kZRegMaxSize = 2048;
+const unsigned kZRegMaxSizeLog2 = 11;
+const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8;
+const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3;
+
+// The P register size depends on the Z register size.
+const unsigned kZRegBitsPerPRegBit = kBitsPerByte;
+const unsigned kZRegBitsPerPRegBitLog2 = 3;
+const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit;
+const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3;
+const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8;
+const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3;
+const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit;
+const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3;
+const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8;
+const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3;
+
// Make these moved float constants backwards compatible
// with explicit vixl::aarch64:: namespace references.
using vixl::kDoubleMantissaBits;
@@ -151,6 +176,44 @@ enum AddrMode { Offset, PreIndex, PostIndex };
enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister };
+enum VectorFormat {
+ kFormatUndefined = 0xffffffff,
+ kFormat8B = NEON_8B,
+ kFormat16B = NEON_16B,
+ kFormat4H = NEON_4H,
+ kFormat8H = NEON_8H,
+ kFormat2S = NEON_2S,
+ kFormat4S = NEON_4S,
+ kFormat1D = NEON_1D,
+ kFormat2D = NEON_2D,
+
+ // Scalar formats. We add the scalar bit to distinguish between scalar and
+ // vector enumerations; the bit is always set in the encoding of scalar ops
+ // and always clear for vector ops. Although kFormatD and kFormat1D appear
+ // to be the same, their meaning is subtly different. The first is a scalar
+ // operation, the second a vector operation that only affects one lane.
+ kFormatB = NEON_B | NEONScalar,
+ kFormatH = NEON_H | NEONScalar,
+ kFormatS = NEON_S | NEONScalar,
+ kFormatD = NEON_D | NEONScalar,
+
+ // An artificial value, used to distinguish from NEON format category.
+ kFormatSVE = 0x0000fffd,
+ // An artificial value. Q lane size isn't encoded in the usual size field.
+ kFormatSVEQ = 0x000f0000,
+ // Vector element width of SVE register with the unknown lane count since
+ // the vector length is implementation dependent.
+ kFormatVnB = SVE_B | kFormatSVE,
+ kFormatVnH = SVE_H | kFormatSVE,
+ kFormatVnS = SVE_S | kFormatSVE,
+ kFormatVnD = SVE_D | kFormatSVE,
+ kFormatVnQ = kFormatSVEQ | kFormatSVE,
+
+ // An artificial value, used by simulator trace tests and a few oddball
+ // instructions (such as FMLAL).
+ kFormat2H = 0xfffffffe
+};
+
// Instructions. ---------------------------------------------------------------
class Instruction {
@@ -229,6 +292,29 @@ class Instruction {
INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
#undef DEFINE_GETTER
+ template <int msb, int lsb>
+ int32_t GetRx() const {
+ // We don't have any register fields wider than five bits, so the result
+ // will always fit into an int32_t.
+ VIXL_ASSERT((msb - lsb + 1) <= 5);
+ return this->ExtractBits(msb, lsb);
+ }
+
+ VectorFormat GetSVEVectorFormat() const {
+ switch (Mask(SVESizeFieldMask)) {
+ case SVE_B:
+ return kFormatVnB;
+ case SVE_H:
+ return kFormatVnH;
+ case SVE_S:
+ return kFormatVnS;
+ case SVE_D:
+ return kFormatVnD;
+ }
+ VIXL_UNREACHABLE();
+ return kFormatUndefined;
+ }
+
// ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
// formed from ImmPCRelLo and ImmPCRelHi.
int GetImmPCRel() const {
@@ -254,6 +340,20 @@ class Instruction {
VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) {
return GetImmLogical();
}
+ uint64_t GetSVEImmLogical() const;
+ int GetSVEBitwiseImmLaneSizeInBytesLog2() const;
+ uint64_t DecodeImmBitMask(int32_t n,
+ int32_t imm_s,
+ int32_t imm_r,
+ int32_t size) const;
+
+ std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
+
+ std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const;
+
+ int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const;
+
+ int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const;
unsigned GetImmNEONabcdefgh() const;
VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) {
@@ -280,6 +380,16 @@ class Instruction {
return GetImmNEONFP64();
}
+ Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); }
+
+ float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); }
+
+ double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); }
+
+ static Float16 Imm8ToFloat16(uint32_t imm8);
+ static float Imm8ToFP32(uint32_t imm8);
+ static double Imm8ToFP64(uint32_t imm8);
+
unsigned GetSizeLS() const {
return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask)));
}
@@ -342,6 +452,9 @@ class Instruction {
return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
}
+ // True if `this` is valid immediately after the provided movprfx instruction.
+ bool CanTakeSVEMovprfx(Instruction const* movprfx) const;
+
bool IsLoad() const;
bool IsStore() const;
@@ -557,41 +670,12 @@ class Instruction {
private:
int GetImmBranch() const;
- static Float16 Imm8ToFloat16(uint32_t imm8);
- static float Imm8ToFP32(uint32_t imm8);
- static double Imm8ToFP64(uint32_t imm8);
-
void SetPCRelImmTarget(const Instruction* target);
void SetBranchImmTarget(const Instruction* target);
};
-// Functions for handling NEON vector format information.
-enum VectorFormat {
- kFormatUndefined = 0xffffffff,
- kFormat8B = NEON_8B,
- kFormat16B = NEON_16B,
- kFormat4H = NEON_4H,
- kFormat8H = NEON_8H,
- kFormat2S = NEON_2S,
- kFormat4S = NEON_4S,
- kFormat1D = NEON_1D,
- kFormat2D = NEON_2D,
-
- // Scalar formats. We add the scalar bit to distinguish between scalar and
- // vector enumerations; the bit is always set in the encoding of scalar ops
- // and always clear for vector ops. Although kFormatD and kFormat1D appear
- // to be the same, their meaning is subtly different. The first is a scalar
- // operation, the second a vector operation that only affects one lane.
- kFormatB = NEON_B | NEONScalar,
- kFormatH = NEON_H | NEONScalar,
- kFormatS = NEON_S | NEONScalar,
- kFormatD = NEON_D | NEONScalar,
-
- // An artificial value, used by simulator trace tests and a few oddball
- // instructions (such as FMLAL).
- kFormat2H = 0xfffffffe
-};
+// Functions for handling NEON and SVE vector format information.
const int kMaxLanesPerVector = 16;
@@ -599,12 +683,16 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform);
VectorFormat VectorFormatDoubleWidth(VectorFormat vform);
VectorFormat VectorFormatDoubleLanes(VectorFormat vform);
VectorFormat VectorFormatHalfLanes(VectorFormat vform);
-VectorFormat ScalarFormatFromLaneSize(int lanesize);
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits);
VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform);
VectorFormat VectorFormatFillQ(VectorFormat vform);
VectorFormat ScalarFormatFromFormat(VectorFormat vform);
+VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits);
+VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes);
+VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2);
unsigned RegisterSizeInBitsFromFormat(VectorFormat vform);
unsigned RegisterSizeInBytesFromFormat(VectorFormat vform);
+bool IsSVEFormat(VectorFormat vform);
// TODO: Make the return types of these functions consistent.
unsigned LaneSizeInBitsFromFormat(VectorFormat vform);
int LaneSizeInBytesFromFormat(VectorFormat vform);
diff --git a/src/aarch64/instrument-aarch64.cc b/src/aarch64/instrument-aarch64.cc
deleted file mode 100644
index 7cb6b20e..00000000
--- a/src/aarch64/instrument-aarch64.cc
+++ /dev/null
@@ -1,975 +0,0 @@
-// Copyright 2014, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-// * Neither the name of ARM Limited nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "instrument-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-Counter::Counter(const char* name, CounterType type)
- : count_(0), enabled_(false), type_(type) {
- VIXL_ASSERT(name != NULL);
- strncpy(name_, name, kCounterNameMaxLength - 1);
- // Make sure `name_` is always NULL-terminated, even if the source's length is
- // higher.
- name_[kCounterNameMaxLength - 1] = '\0';
-}
-
-
-void Counter::Enable() { enabled_ = true; }
-
-
-void Counter::Disable() { enabled_ = false; }
-
-
-bool Counter::IsEnabled() { return enabled_; }
-
-
-void Counter::Increment() {
- if (enabled_) {
- count_++;
- }
-}
-
-
-uint64_t Counter::GetCount() {
- uint64_t result = count_;
- if (type_ == Gauge) {
- // If the counter is a Gauge, reset the count after reading.
- count_ = 0;
- }
- return result;
-}
-
-
-const char* Counter::GetName() { return name_; }
-
-
-CounterType Counter::GetType() { return type_; }
-
-
-struct CounterDescriptor {
- const char* name;
- CounterType type;
-};
-
-
-static const CounterDescriptor kCounterList[] =
- {{"Instruction", Cumulative},
-
- {"Move Immediate", Gauge},
- {"Add/Sub DP", Gauge},
- {"Logical DP", Gauge},
- {"Other Int DP", Gauge},
- {"FP DP", Gauge},
-
- {"Conditional Select", Gauge},
- {"Conditional Compare", Gauge},
-
- {"Unconditional Branch", Gauge},
- {"Compare and Branch", Gauge},
- {"Test and Branch", Gauge},
- {"Conditional Branch", Gauge},
-
- {"Load Integer", Gauge},
- {"Load FP", Gauge},
- {"Load Pair", Gauge},
- {"Load Literal", Gauge},
-
- {"Store Integer", Gauge},
- {"Store FP", Gauge},
- {"Store Pair", Gauge},
-
- {"PC Addressing", Gauge},
- {"Other", Gauge},
- {"NEON", Gauge},
- {"Crypto", Gauge}};
-
-
-Instrument::Instrument(const char* datafile, uint64_t sample_period)
- : output_stream_(stdout), sample_period_(sample_period) {
- // Set up the output stream. If datafile is non-NULL, use that file. If it
- // can't be opened, or datafile is NULL, use stdout.
- if (datafile != NULL) {
- output_stream_ = fopen(datafile, "w");
- if (output_stream_ == NULL) {
- printf("Can't open output file %s. Using stdout.\n", datafile);
- output_stream_ = stdout;
- }
- }
-
- static const int num_counters =
- sizeof(kCounterList) / sizeof(CounterDescriptor);
-
- // Dump an instrumentation description comment at the top of the file.
- fprintf(output_stream_, "# counters=%d\n", num_counters);
- fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
-
- // Construct Counter objects from counter description array.
- for (int i = 0; i < num_counters; i++) {
- Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type);
- counters_.push_back(counter);
- }
-
- DumpCounterNames();
-}
-
-
-Instrument::~Instrument() {
- // Dump any remaining instruction data to the output file.
- DumpCounters();
-
- // Free all the counter objects.
- std::list<Counter*>::iterator it;
- for (it = counters_.begin(); it != counters_.end(); it++) {
- delete *it;
- }
-
- if (output_stream_ != stdout) {
- fclose(output_stream_);
- }
-}
-
-
-void Instrument::Update() {
- // Increment the instruction counter, and dump all counters if a sample period
- // has elapsed.
- static Counter* counter = GetCounter("Instruction");
- VIXL_ASSERT(counter->GetType() == Cumulative);
- counter->Increment();
-
- if ((sample_period_ != 0) && counter->IsEnabled() &&
- (counter->GetCount() % sample_period_) == 0) {
- DumpCounters();
- }
-}
-
-
-void Instrument::DumpCounters() {
- // Iterate through the counter objects, dumping their values to the output
- // stream.
- std::list<Counter*>::const_iterator it;
- for (it = counters_.begin(); it != counters_.end(); it++) {
- fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount());
- }
- fprintf(output_stream_, "\n");
- fflush(output_stream_);
-}
-
-
-void Instrument::DumpCounterNames() {
- // Iterate through the counter objects, dumping the counter names to the
- // output stream.
- std::list<Counter*>::const_iterator it;
- for (it = counters_.begin(); it != counters_.end(); it++) {
- fprintf(output_stream_, "%s,", (*it)->GetName());
- }
- fprintf(output_stream_, "\n");
- fflush(output_stream_);
-}
-
-
-void Instrument::HandleInstrumentationEvent(unsigned event) {
- switch (event) {
- case InstrumentStateEnable:
- Enable();
- break;
- case InstrumentStateDisable:
- Disable();
- break;
- default:
- DumpEventMarker(event);
- }
-}
-
-
-void Instrument::DumpEventMarker(unsigned marker) {
- // Dumpan event marker to the output stream as a specially formatted comment
- // line.
- static Counter* counter = GetCounter("Instruction");
-
- fprintf(output_stream_,
- "# %c%c @ %" PRId64 "\n",
- marker & 0xff,
- (marker >> 8) & 0xff,
- counter->GetCount());
-}
-
-
-Counter* Instrument::GetCounter(const char* name) {
- // Get a Counter object by name from the counter list.
- std::list<Counter*>::const_iterator it;
- for (it = counters_.begin(); it != counters_.end(); it++) {
- if (strcmp((*it)->GetName(), name) == 0) {
- return *it;
- }
- }
-
- // A Counter by that name does not exist: print an error message to stderr
- // and the output file, and exit.
- static const char* error_message =
- "# Error: Unknown counter \"%s\". Exiting.\n";
- fprintf(stderr, error_message, name);
- fprintf(output_stream_, error_message, name);
- exit(1);
-}
-
-
-void Instrument::Enable() {
- std::list<Counter*>::iterator it;
- for (it = counters_.begin(); it != counters_.end(); it++) {
- (*it)->Enable();
- }
-}
-
-
-void Instrument::Disable() {
- std::list<Counter*>::iterator it;
- for (it = counters_.begin(); it != counters_.end(); it++) {
- (*it)->Disable();
- }
-}
-
-
-void Instrument::VisitPCRelAddressing(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("PC Addressing");
- counter->Increment();
-}
-
-
-void Instrument::VisitAddSubImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Add/Sub DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitLogicalImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Logical DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
- Update();
- static Counter* counter = GetCounter("Move Immediate");
-
- if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) {
- unsigned imm = instr->GetImmMoveWide();
- HandleInstrumentationEvent(imm);
- } else {
- counter->Increment();
- }
-}
-
-
-void Instrument::VisitBitfield(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other Int DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitExtract(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other Int DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Unconditional Branch");
- counter->Increment();
-}
-
-
-void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Unconditional Branch");
- counter->Increment();
-}
-
-
-void Instrument::VisitCompareBranch(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Compare and Branch");
- counter->Increment();
-}
-
-
-void Instrument::VisitTestBranch(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Test and Branch");
- counter->Increment();
-}
-
-
-void Instrument::VisitConditionalBranch(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Conditional Branch");
- counter->Increment();
-}
-
-
-void Instrument::VisitSystem(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitException(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
- static Counter* load_pair_counter = GetCounter("Load Pair");
- static Counter* store_pair_counter = GetCounter("Store Pair");
-
- if (instr->Mask(LoadStorePairLBit) != 0) {
- load_pair_counter->Increment();
- } else {
- store_pair_counter->Increment();
- }
-}
-
-
-void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
- Update();
- InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
- Update();
- InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
- Update();
- InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
- Update();
- InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitAtomicMemory(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitLoadLiteral(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Load Literal");
- counter->Increment();
-}
-
-
-void Instrument::VisitLoadStorePAC(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Load Integer");
- counter->Increment();
-}
-
-
-void Instrument::InstrumentLoadStore(const Instruction* instr) {
- static Counter* load_int_counter = GetCounter("Load Integer");
- static Counter* store_int_counter = GetCounter("Store Integer");
- static Counter* load_fp_counter = GetCounter("Load FP");
- static Counter* store_fp_counter = GetCounter("Store FP");
-
- switch (instr->Mask(LoadStoreMask)) {
- case STRB_w:
- case STRH_w:
- case STR_w:
- VIXL_FALLTHROUGH();
- case STR_x:
- store_int_counter->Increment();
- break;
- case STR_s:
- VIXL_FALLTHROUGH();
- case STR_d:
- store_fp_counter->Increment();
- break;
- case LDRB_w:
- case LDRH_w:
- case LDR_w:
- case LDR_x:
- case LDRSB_x:
- case LDRSH_x:
- case LDRSW_x:
- case LDRSB_w:
- VIXL_FALLTHROUGH();
- case LDRSH_w:
- load_int_counter->Increment();
- break;
- case LDR_s:
- VIXL_FALLTHROUGH();
- case LDR_d:
- load_fp_counter->Increment();
- break;
- }
-}
-
-
-void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
- Update();
- InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
- USE(instr);
- Update();
- InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
- Update();
- InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
- Update();
- InstrumentLoadStore(instr);
-}
-
-void Instrument::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
- Update();
- switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
- case STLURB:
- case STLURH:
- case STLUR_w:
- case STLUR_x: {
- static Counter* counter = GetCounter("Store Integer");
- counter->Increment();
- break;
- }
- case LDAPURB:
- case LDAPURSB_w:
- case LDAPURSB_x:
- case LDAPURH:
- case LDAPURSH_w:
- case LDAPURSH_x:
- case LDAPUR_w:
- case LDAPURSW:
- case LDAPUR_x: {
- static Counter* counter = GetCounter("Load Integer");
- counter->Increment();
- break;
- }
- }
-}
-
-
-void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
- Update();
- InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLogicalShifted(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Logical DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitAddSubShifted(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Add/Sub DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitAddSubExtended(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Add/Sub DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Add/Sub DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitRotateRightIntoFlags(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitEvaluateIntoFlags(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Conditional Compare");
- counter->Increment();
-}
-
-
-void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Conditional Compare");
- counter->Increment();
-}
-
-
-void Instrument::VisitConditionalSelect(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Conditional Select");
- counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other Int DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other Int DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other Int DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPCompare(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Conditional Compare");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Conditional Select");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("FP DP");
- counter->Increment();
-}
-
-
-void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Crypto");
- counter->Increment();
-}
-
-
-void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Crypto");
- counter->Increment();
-}
-
-
-void Instrument::VisitCryptoAES(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Crypto");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEON3Same(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEON3SameFP16(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEON3SameExtra(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEON3Different(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONCopy(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONExtract(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
- const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
- const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONTable(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitNEONPerm(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("NEON");
- counter->Increment();
-}
-
-
-void Instrument::VisitReserved(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitUnallocated(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-void Instrument::VisitUnimplemented(const Instruction* instr) {
- USE(instr);
- Update();
- static Counter* counter = GetCounter("Other");
- counter->Increment();
-}
-
-
-} // namespace aarch64
-} // namespace vixl
diff --git a/src/aarch64/instrument-aarch64.h b/src/aarch64/instrument-aarch64.h
deleted file mode 100644
index 4401b3ea..00000000
--- a/src/aarch64/instrument-aarch64.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2014, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-// * Neither the name of ARM Limited nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef VIXL_AARCH64_INSTRUMENT_AARCH64_H_
-#define VIXL_AARCH64_INSTRUMENT_AARCH64_H_
-
-#include "../globals-vixl.h"
-#include "../utils-vixl.h"
-
-#include "constants-aarch64.h"
-#include "decoder-aarch64.h"
-#include "instrument-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-const int kCounterNameMaxLength = 256;
-const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22;
-
-
-enum InstrumentState { InstrumentStateDisable = 0, InstrumentStateEnable = 1 };
-
-
-enum CounterType {
- Gauge = 0, // Gauge counters reset themselves after reading.
- Cumulative = 1 // Cumulative counters keep their value after reading.
-};
-
-
-class Counter {
- public:
- explicit Counter(const char* name, CounterType type = Gauge);
-
- void Increment();
- void Enable();
- void Disable();
- bool IsEnabled();
- uint64_t GetCount();
- VIXL_DEPRECATED("GetCount", uint64_t count()) { return GetCount(); }
-
- const char* GetName();
- VIXL_DEPRECATED("GetName", const char* name()) { return GetName(); }
-
- CounterType GetType();
- VIXL_DEPRECATED("GetType", CounterType type()) { return GetType(); }
-
- private:
- char name_[kCounterNameMaxLength];
- uint64_t count_;
- bool enabled_;
- CounterType type_;
-};
-
-
-class Instrument : public DecoderVisitor {
- public:
- explicit Instrument(
- const char* datafile = NULL,
- uint64_t sample_period = kDefaultInstrumentationSamplingPeriod);
- ~Instrument();
-
- void Enable();
- void Disable();
-
-// Declare all Visitor functions.
-#define DECLARE(A) void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
- VISITOR_LIST(DECLARE)
-#undef DECLARE
-
- private:
- void Update();
- void DumpCounters();
- void DumpCounterNames();
- void DumpEventMarker(unsigned marker);
- void HandleInstrumentationEvent(unsigned event);
- Counter* GetCounter(const char* name);
-
- void InstrumentLoadStore(const Instruction* instr);
- void InstrumentLoadStorePair(const Instruction* instr);
-
- std::list<Counter*> counters_;
-
- FILE* output_stream_;
-
- // Counter information is dumped every sample_period_ instructions decoded.
- // For a sample_period_ = 0 a final counter value is only produced when the
- // Instrumentation class is destroyed.
- uint64_t sample_period_;
-};
-
-} // namespace aarch64
-} // namespace vixl
-
-#endif // VIXL_AARCH64_INSTRUMENT_AARCH64_H_
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index e7ede2f9..cab02573 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -184,14 +184,28 @@ void Simulator::ld1(VectorFormat vform,
}
-void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
+void Simulator::ld1r(VectorFormat vform,
+ VectorFormat unpack_vform,
+ LogicVRegister dst,
+ uint64_t addr,
+ bool is_signed) {
+ unsigned unpack_size = LaneSizeInBitsFromFormat(unpack_vform);
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- dst.ReadUintFromMem(vform, i, addr);
+ if (is_signed) {
+ dst.ReadIntFromMem(vform, unpack_size, i, addr);
+ } else {
+ dst.ReadUintFromMem(vform, unpack_size, i, addr);
+ }
}
}
+void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
+ ld1r(vform, vform, dst, addr);
+}
+
+
void Simulator::ld2(VectorFormat vform,
LogicVRegister dst1,
LogicVRegister dst2,
@@ -550,6 +564,7 @@ LogicVRegister Simulator::add(VectorFormat vform,
const LogicVRegister& src2) {
int lane_size = LaneSizeInBitsFromFormat(vform);
dst.ClearForWrite(vform);
+
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
// Test for unsigned saturation.
uint64_t ua = src1.UintLeftJustified(vform, i);
@@ -568,12 +583,39 @@ LogicVRegister Simulator::add(VectorFormat vform,
if ((pos_a == pos_b) && (pos_a != pos_r)) {
dst.SetSignedSat(i, pos_a);
}
-
dst.SetInt(vform, i, ur >> (64 - lane_size));
}
return dst;
}
+LogicVRegister Simulator::add_uint(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ uint64_t value) {
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ VIXL_ASSERT(IsUintN(lane_size, value));
+ dst.ClearForWrite(vform);
+ // Left-justify `value`.
+ uint64_t ub = value << (64 - lane_size);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Test for unsigned saturation.
+ uint64_t ua = src1.UintLeftJustified(vform, i);
+ uint64_t ur = ua + ub;
+ if (ur < ua) {
+ dst.SetUnsignedSat(i, true);
+ }
+
+ // Test for signed saturation.
+ // `value` is always positive, so we have an overflow if the (signed) result
+ // is smaller than the first operand.
+ if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
+ dst.SetSignedSat(i, true);
+ }
+
+ dst.SetInt(vform, i, ur >> (64 - lane_size));
+ }
+ return dst;
+}
LogicVRegister Simulator::addp(VectorFormat vform,
LogicVRegister dst,
@@ -586,25 +628,68 @@ LogicVRegister Simulator::addp(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::sdiv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t val1 = src1.Int(vform, i);
+ int64_t val2 = src2.Int(vform, i);
+ int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
+ int64_t quotient = 0;
+ if ((val1 == min_int) && (val2 == -1)) {
+ quotient = min_int;
+ } else if (val2 != 0) {
+ quotient = val1 / val2;
+ }
+ dst.SetInt(vform, i, quotient);
+ }
+
+ return dst;
+}
+
+LogicVRegister Simulator::udiv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t val1 = src1.Uint(vform, i);
+ uint64_t val2 = src2.Uint(vform, i);
+ uint64_t quotient = 0;
+ if (val2 != 0) {
+ quotient = val1 / val2;
+ }
+ dst.SetUint(vform, i, quotient);
+ }
+
+ return dst;
+}
+
LogicVRegister Simulator::mla(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2) {
SimVRegister temp;
mul(vform, temp, src1, src2);
- add(vform, dst, dst, temp);
+ add(vform, dst, srca, temp);
return dst;
}
LogicVRegister Simulator::mls(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2) {
SimVRegister temp;
mul(vform, temp, src1, src2);
- sub(vform, dst, dst, temp);
+ sub(vform, dst, srca, temp);
return dst;
}
@@ -614,6 +699,7 @@ LogicVRegister Simulator::mul(VectorFormat vform,
const LogicVRegister& src1,
const LogicVRegister& src2) {
dst.ClearForWrite(vform);
+
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
}
@@ -632,6 +718,70 @@ LogicVRegister Simulator::mul(VectorFormat vform,
}
+LogicVRegister Simulator::smulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t dst_val;
+ int64_t val1 = src1.Int(vform, i);
+ int64_t val2 = src2.Int(vform, i);
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8:
+ dst_val = internal::MultiplyHigh<8>(val1, val2);
+ break;
+ case 16:
+ dst_val = internal::MultiplyHigh<16>(val1, val2);
+ break;
+ case 32:
+ dst_val = internal::MultiplyHigh<32>(val1, val2);
+ break;
+ case 64:
+ dst_val = internal::MultiplyHigh<64>(val1, val2);
+ break;
+ default:
+ dst_val = 0xbadbeef;
+ VIXL_UNREACHABLE();
+ break;
+ }
+ dst.SetInt(vform, i, dst_val);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::umulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t dst_val;
+ uint64_t val1 = src1.Uint(vform, i);
+ uint64_t val2 = src2.Uint(vform, i);
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8:
+ dst_val = internal::MultiplyHigh<8>(val1, val2);
+ break;
+ case 16:
+ dst_val = internal::MultiplyHigh<16>(val1, val2);
+ break;
+ case 32:
+ dst_val = internal::MultiplyHigh<32>(val1, val2);
+ break;
+ case 64:
+ dst_val = internal::MultiplyHigh<64>(val1, val2);
+ break;
+ default:
+ dst_val = 0xbadbeef;
+ VIXL_UNREACHABLE();
+ break;
+ }
+ dst.SetUint(vform, i, dst_val);
+ }
+ return dst;
+}
+
+
LogicVRegister Simulator::mla(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -639,7 +789,7 @@ LogicVRegister Simulator::mla(VectorFormat vform,
int index) {
SimVRegister temp;
VectorFormat indexform = VectorFormatFillQ(vform);
- return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
+ return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
}
@@ -650,7 +800,7 @@ LogicVRegister Simulator::mls(VectorFormat vform,
int index) {
SimVRegister temp;
VectorFormat indexform = VectorFormatFillQ(vform);
- return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
+ return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
}
@@ -898,8 +1048,14 @@ LogicVRegister Simulator::sdot(VectorFormat vform,
const LogicVRegister& src2,
int index) {
SimVRegister temp;
- VectorFormat indexform = VectorFormatFillQ(vform);
- return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index));
+ // NEON indexed `dot` allows the index value exceed the register size.
+ // Promote the format to Q-sized vector format before the duplication.
+ dup_elements_to_segments(IsSVEFormat(vform) ? vform
+ : VectorFormatFillQ(vform),
+ temp,
+ src2,
+ index);
+ return sdot(vform, dst, src1, temp);
}
@@ -920,8 +1076,14 @@ LogicVRegister Simulator::udot(VectorFormat vform,
const LogicVRegister& src2,
int index) {
SimVRegister temp;
- VectorFormat indexform = VectorFormatFillQ(vform);
- return udot(vform, dst, src1, dup_element(indexform, temp, src2, index));
+ // NEON indexed `dot` allows the index value exceed the register size.
+ // Promote the format to Q-sized vector format before the duplication.
+ dup_elements_to_segments(IsSVEFormat(vform) ? vform
+ : VectorFormatFillQ(vform),
+ temp,
+ src2,
+ index);
+ return udot(vform, dst, src1, temp);
}
@@ -1025,6 +1187,34 @@ LogicVRegister Simulator::sub(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::sub_uint(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ uint64_t value) {
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ VIXL_ASSERT(IsUintN(lane_size, value));
+ dst.ClearForWrite(vform);
+ // Left-justify `value`.
+ uint64_t ub = value << (64 - lane_size);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Test for unsigned saturation.
+ uint64_t ua = src1.UintLeftJustified(vform, i);
+ uint64_t ur = ua - ub;
+ if (ub > ua) {
+ dst.SetUnsignedSat(i, false);
+ }
+
+ // Test for signed saturation.
+ // `value` is always positive, so we have an overflow if the (signed) result
+ // is greater than the first operand.
+ if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
+ dst.SetSignedSat(i, false);
+ }
+
+ dst.SetInt(vform, i, ur >> (64 - lane_size));
+ }
+ return dst;
+}
LogicVRegister Simulator::and_(VectorFormat vform,
LogicVRegister dst,
@@ -1091,12 +1281,12 @@ LogicVRegister Simulator::bic(VectorFormat vform,
const LogicVRegister& src,
uint64_t imm) {
uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; ++i) {
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; ++i) {
result[i] = src.Uint(vform, i) & ~imm;
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -1298,10 +1488,13 @@ LogicVRegister Simulator::uaddlv(VectorFormat vform,
LogicVRegister Simulator::sminmaxv(VectorFormat vform,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src,
bool max) {
int64_t dst_val = max ? INT64_MIN : INT64_MAX;
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
int64_t src_val = src.Int(vform, i);
if (max) {
dst_val = (src_val > dst_val) ? src_val : dst_val;
@@ -1318,15 +1511,35 @@ LogicVRegister Simulator::sminmaxv(VectorFormat vform,
LogicVRegister Simulator::smaxv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- sminmaxv(vform, dst, src, true);
+ sminmaxv(vform, dst, GetPTrue(), src, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::sminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ sminmaxv(vform, dst, GetPTrue(), src, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::smaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ sminmaxv(vform, dst, pg, src, true);
return dst;
}
LogicVRegister Simulator::sminv(VectorFormat vform,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src) {
- sminmaxv(vform, dst, src, false);
+ VIXL_ASSERT(IsSVEFormat(vform));
+ sminmaxv(vform, dst, pg, src, false);
return dst;
}
@@ -1414,10 +1627,13 @@ LogicVRegister Simulator::uminp(VectorFormat vform,
LogicVRegister Simulator::uminmaxv(VectorFormat vform,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src,
bool max) {
uint64_t dst_val = max ? 0 : UINT64_MAX;
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
uint64_t src_val = src.Uint(vform, i);
if (max) {
dst_val = (src_val > dst_val) ? src_val : dst_val;
@@ -1434,7 +1650,7 @@ LogicVRegister Simulator::uminmaxv(VectorFormat vform,
LogicVRegister Simulator::umaxv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- uminmaxv(vform, dst, src, true);
+ uminmaxv(vform, dst, GetPTrue(), src, true);
return dst;
}
@@ -1442,7 +1658,27 @@ LogicVRegister Simulator::umaxv(VectorFormat vform,
LogicVRegister Simulator::uminv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- uminmaxv(vform, dst, src, false);
+ uminmaxv(vform, dst, GetPTrue(), src, false);
+ return dst;
+}
+
+
+LogicVRegister Simulator::umaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uminmaxv(vform, dst, pg, src, true);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uminmaxv(vform, dst, pg, src, false);
return dst;
}
@@ -1521,14 +1757,104 @@ LogicVRegister Simulator::ushll2(VectorFormat vform,
return ushl(vform, dst, extendedreg, shiftreg);
}
+std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ int offset_from_last_active) {
+ // Untested for any other values.
+ VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
+
+ int last_active = GetLastActive(vform, pg);
+ int lane_count = LaneCountFromFormat(vform);
+ int index =
+ ((last_active + offset_from_last_active) + lane_count) % lane_count;
+ return std::make_pair(last_active >= 0, src.Uint(vform, index));
+}
+
+LogicVRegister Simulator::compact(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ int j = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (pg.IsActive(vform, i)) {
+ dst.SetUint(vform, j++, src.Uint(vform, i));
+ }
+ }
+ for (; j < LaneCountFromFormat(vform); j++) {
+ dst.SetUint(vform, j, 0);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::splice(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ int lane_count = LaneCountFromFormat(vform);
+ int first_active = GetFirstActive(vform, pg);
+ int last_active = GetLastActive(vform, pg);
+ int dst_idx = 0;
+ uint64_t result[kZRegMaxSizeInBytes];
+
+ if (first_active >= 0) {
+ VIXL_ASSERT(last_active >= first_active);
+ VIXL_ASSERT(last_active < lane_count);
+ for (int i = first_active; i <= last_active; i++) {
+ result[dst_idx++] = src1.Uint(vform, i);
+ }
+ }
+
+ VIXL_ASSERT(dst_idx <= lane_count);
+ for (int i = dst_idx; i < lane_count; i++) {
+ result[i] = src2.Uint(vform, i - dst_idx);
+ }
+
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::sel(VectorFormat vform,
+ LogicVRegister dst,
+ const SimPRegister& pg,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ int p_reg_bits_per_lane =
+ LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
+ for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
+ ? src1.Uint(vform, lane)
+ : src2.Uint(vform, lane);
+ dst.SetUint(vform, lane, lane_value);
+ }
+ return dst;
+}
+
+
+LogicPRegister Simulator::sel(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src1,
+ const LogicPRegister& src2) {
+ for (int i = 0; i < dst.GetChunkCount(); i++) {
+ LogicPRegister::ChunkType mask = pg.GetChunk(i);
+ LogicPRegister::ChunkType result =
+ (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
+ dst.SetChunk(i, result);
+ }
+ return dst;
+}
+
LogicVRegister Simulator::sli(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int shift) {
dst.ClearForWrite(vform);
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; i++) {
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; i++) {
uint64_t src_lane = src.Uint(vform, i);
uint64_t dst_lane = dst.Uint(vform, i);
uint64_t shifted = src_lane << shift;
@@ -1577,10 +1903,10 @@ LogicVRegister Simulator::sri(VectorFormat vform,
const LogicVRegister& src,
int shift) {
dst.ClearForWrite(vform);
- int laneCount = LaneCountFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
VIXL_ASSERT((shift > 0) &&
(shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
- for (int i = 0; i < laneCount; i++) {
+ for (int i = 0; i < lane_count; i++) {
uint64_t src_lane = src.Uint(vform, i);
uint64_t dst_lane = dst.Uint(vform, i);
uint64_t shifted;
@@ -1663,15 +1989,18 @@ LogicVRegister Simulator::ursra(VectorFormat vform,
LogicVRegister Simulator::cls(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- uint64_t result[16];
- int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; i++) {
- result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
+ int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
+
+ // Ensure that we can store one result per lane.
+ int result[kZRegMaxSizeInBytes];
+
+ for (int i = 0; i < lane_count; i++) {
+ result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -1681,38 +2010,51 @@ LogicVRegister Simulator::cls(VectorFormat vform,
LogicVRegister Simulator::clz(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- uint64_t result[16];
- int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; i++) {
- result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
+ int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
+
+ // Ensure that we can store one result per lane.
+ int result[kZRegMaxSizeInBytes];
+
+ for (int i = 0; i < lane_count; i++) {
+ result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
}
+LogicVRegister Simulator::cnot(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
+ dst.SetUint(vform, i, value);
+ }
+ return dst;
+}
+
+
LogicVRegister Simulator::cnt(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- uint64_t result[16];
- int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; i++) {
- uint64_t value = src.Uint(vform, i);
- result[i] = 0;
- for (int j = 0; j < laneSizeInBits; j++) {
- result[i] += (value & 1);
- value >>= 1;
- }
+ int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
+
+ // Ensure that we can store one result per lane.
+ int result[kZRegMaxSizeInBytes];
+
+ for (int i = 0; i < lane_count; i++) {
+ result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -1896,11 +2238,108 @@ LogicVRegister Simulator::abs(VectorFormat vform,
}
+LogicVRegister Simulator::andv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ result &= src.Uint(vform, i);
+ }
+ VectorFormat vform_dst =
+ ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+ dst.ClearForWrite(vform_dst);
+ dst.SetUint(vform_dst, 0, result);
+ return dst;
+}
+
+
+LogicVRegister Simulator::eorv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uint64_t result = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ result ^= src.Uint(vform, i);
+ }
+ VectorFormat vform_dst =
+ ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+ dst.ClearForWrite(vform_dst);
+ dst.SetUint(vform_dst, 0, result);
+ return dst;
+}
+
+
+LogicVRegister Simulator::orv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uint64_t result = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ result |= src.Uint(vform, i);
+ }
+ VectorFormat vform_dst =
+ ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+ dst.ClearForWrite(vform_dst);
+ dst.SetUint(vform_dst, 0, result);
+ return dst;
+}
+
+
+LogicVRegister Simulator::saddv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
+ int64_t result = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ // The destination register always has D-lane sizes and the source register
+ // always has S-lanes or smaller, so signed integer overflow -- undefined
+ // behaviour -- can't occur.
+ result += src.Int(vform, i);
+ }
+
+ dst.ClearForWrite(kFormatD);
+ dst.SetInt(kFormatD, 0, result);
+ return dst;
+}
+
+
+LogicVRegister Simulator::uaddv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uint64_t result = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ result += src.Uint(vform, i);
+ }
+
+ dst.ClearForWrite(kFormatD);
+ dst.SetUint(kFormatD, 0, result);
+ return dst;
+}
+
+
LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
LogicVRegister dst,
- bool dstIsSigned,
+ bool dst_is_signed,
const LogicVRegister& src,
- bool srcIsSigned) {
+ bool src_is_signed) {
bool upperhalf = false;
VectorFormat srcform = kFormatUndefined;
int64_t ssrc[8];
@@ -1969,7 +2408,7 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
}
// Test for unsigned saturation
- if (srcIsSigned) {
+ if (src_is_signed) {
if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
dst.SetUnsignedSat(offset + i, true);
} else if (ssrc[i] < 0) {
@@ -1982,13 +2421,13 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
}
int64_t result;
- if (srcIsSigned) {
+ if (src_is_signed) {
result = ssrc[i] & MaxUintFromFormat(dstform);
} else {
result = usrc[i] & MaxUintFromFormat(dstform);
}
- if (dstIsSigned) {
+ if (dst_is_signed) {
dst.SetInt(dstform, offset + i, result);
} else {
dst.SetUint(dstform, offset + i, result);
@@ -2030,17 +2469,17 @@ LogicVRegister Simulator::absdiff(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
- bool issigned) {
+ bool is_signed) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- if (issigned) {
- int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
- sr = sr > 0 ? sr : -sr;
- dst.SetInt(vform, i, sr);
+ bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
+ : (src1.Uint(vform, i) > src2.Uint(vform, i));
+ // Always calculate the answer using unsigned arithmetic, to avoid
+ // implemenation-defined signed overflow.
+ if (src1_gt_src2) {
+ dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
} else {
- int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
- sr = sr > 0 ? sr : -sr;
- dst.SetUint(vform, i, sr);
+ dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
}
}
return dst;
@@ -2085,15 +2524,15 @@ LogicVRegister Simulator::not_(VectorFormat vform,
LogicVRegister Simulator::rbit(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
+ uint64_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
uint64_t reversed_value;
uint64_t value;
- for (int i = 0; i < laneCount; i++) {
+ for (int i = 0; i < lane_count; i++) {
value = src.Uint(vform, i);
reversed_value = 0;
- for (int j = 0; j < laneSizeInBits; j++) {
+ for (int j = 0; j < lane_size_in_bits; j++) {
reversed_value = (reversed_value << 1) | (value & 1);
value >>= 1;
}
@@ -2101,7 +2540,7 @@ LogicVRegister Simulator::rbit(VectorFormat vform,
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -2110,19 +2549,33 @@ LogicVRegister Simulator::rbit(VectorFormat vform,
LogicVRegister Simulator::rev(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src,
- int revSize) {
- uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- int laneSize = LaneSizeInBytesFromFormat(vform);
- int lanesPerLoop = revSize / laneSize;
- for (int i = 0; i < laneCount; i += lanesPerLoop) {
- for (int j = 0; j < lanesPerLoop; j++) {
- result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
+ const LogicVRegister& src) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count / 2; i++) {
+ uint64_t t = src.Uint(vform, i);
+ dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
+ dst.SetUint(vform, lane_count - i - 1, t);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::rev_byte(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int rev_size) {
+ uint64_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ int lane_size = LaneSizeInBytesFromFormat(vform);
+ int lanes_per_loop = rev_size / lane_size;
+ for (int i = 0; i < lane_count; i += lanes_per_loop) {
+ for (int j = 0; j < lanes_per_loop; j++) {
+ result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
}
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -2132,21 +2585,21 @@ LogicVRegister Simulator::rev(VectorFormat vform,
LogicVRegister Simulator::rev16(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return rev(vform, dst, src, 2);
+ return rev_byte(vform, dst, src, 2);
}
LogicVRegister Simulator::rev32(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return rev(vform, dst, src, 4);
+ return rev_byte(vform, dst, src, 4);
}
LogicVRegister Simulator::rev64(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return rev(vform, dst, src, 8);
+ return rev_byte(vform, dst, src, 8);
}
@@ -2215,22 +2668,60 @@ LogicVRegister Simulator::ext(VectorFormat vform,
const LogicVRegister& src1,
const LogicVRegister& src2,
int index) {
- uint8_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount - index; ++i) {
+ uint8_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count - index; ++i) {
result[i] = src1.Uint(vform, i + index);
}
for (int i = 0; i < index; ++i) {
- result[laneCount - index + i] = src2.Uint(vform, i);
+ result[lane_count - index + i] = src2.Uint(vform, i);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
}
template <typename T>
+LogicVRegister Simulator::fadda(VectorFormat vform,
+ LogicVRegister acc,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ T result = acc.Float<T>(0);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ result = FPAdd(result, src.Float<T>(i));
+ }
+ VectorFormat vform_dst =
+ ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+ acc.ClearForWrite(vform_dst);
+ acc.SetFloat(0, result);
+ return acc;
+}
+
+LogicVRegister Simulator::fadda(VectorFormat vform,
+ LogicVRegister acc,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case kHRegSize:
+ fadda<SimFloat16>(vform, acc, pg, src);
+ break;
+ case kSRegSize:
+ fadda<float>(vform, acc, pg, src);
+ break;
+ case kDRegSize:
+ fadda<double>(vform, acc, pg, src);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ return acc;
+}
+
+template <typename T>
LogicVRegister Simulator::fcadd(VectorFormat vform,
LogicVRegister dst, // d
const LogicVRegister& src1, // n
@@ -2273,7 +2764,7 @@ LogicVRegister Simulator::fcadd(VectorFormat vform,
const LogicVRegister& src2, // m
int rot) {
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- VIXL_UNIMPLEMENTED();
+ fcadd<SimFloat16>(vform, dst, src1, src2, rot);
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
fcadd<float>(vform, dst, src1, src2, rot);
} else {
@@ -2283,12 +2774,12 @@ LogicVRegister Simulator::fcadd(VectorFormat vform,
return dst;
}
-
template <typename T>
LogicVRegister Simulator::fcmla(VectorFormat vform,
- LogicVRegister dst, // d
- const LogicVRegister& src1, // n
- const LogicVRegister& src2, // m
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ const LogicVRegister& acc,
int index,
int rot) {
int elements = LaneCountFromFormat(vform);
@@ -2301,83 +2792,33 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
// 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
for (int e = 0; e <= (elements / 2) - 1; e++) {
- switch (rot) {
- case 0:
- element1 = src2.Float<T>(index * 2);
- element2 = src1.Float<T>(e * 2);
- element3 = src2.Float<T>(index * 2 + 1);
- element4 = src1.Float<T>(e * 2);
- break;
- case 90:
- element1 = FPNeg(src2.Float<T>(index * 2 + 1));
- element2 = src1.Float<T>(e * 2 + 1);
- element3 = src2.Float<T>(index * 2);
- element4 = src1.Float<T>(e * 2 + 1);
- break;
- case 180:
- element1 = FPNeg(src2.Float<T>(index * 2));
- element2 = src1.Float<T>(e * 2);
- element3 = FPNeg(src2.Float<T>(index * 2 + 1));
- element4 = src1.Float<T>(e * 2);
- break;
- case 270:
- element1 = src2.Float<T>(index * 2 + 1);
- element2 = src1.Float<T>(e * 2 + 1);
- element3 = FPNeg(src2.Float<T>(index * 2));
- element4 = src1.Float<T>(e * 2 + 1);
- break;
- default:
- VIXL_UNREACHABLE();
- return dst; // prevents "element(n) may be unintialized" errors
- }
- dst.ClearForWrite(vform);
- dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
- dst.SetFloat<T>(e * 2 + 1,
- FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
- }
- return dst;
-}
-
-
-template <typename T>
-LogicVRegister Simulator::fcmla(VectorFormat vform,
- LogicVRegister dst, // d
- const LogicVRegister& src1, // n
- const LogicVRegister& src2, // m
- int rot) {
- int elements = LaneCountFromFormat(vform);
-
- T element1, element2, element3, element4;
- rot *= 90;
-
- // Loop example:
- // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
- // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
+ // Index == -1 indicates a vector/vector rather than vector/indexed-element
+ // operation.
+ int f = (index < 0) ? e : index;
- for (int e = 0; e <= (elements / 2) - 1; e++) {
switch (rot) {
case 0:
- element1 = src2.Float<T>(e * 2);
+ element1 = src2.Float<T>(f * 2);
element2 = src1.Float<T>(e * 2);
- element3 = src2.Float<T>(e * 2 + 1);
+ element3 = src2.Float<T>(f * 2 + 1);
element4 = src1.Float<T>(e * 2);
break;
case 90:
- element1 = FPNeg(src2.Float<T>(e * 2 + 1));
+ element1 = FPNeg(src2.Float<T>(f * 2 + 1));
element2 = src1.Float<T>(e * 2 + 1);
- element3 = src2.Float<T>(e * 2);
+ element3 = src2.Float<T>(f * 2);
element4 = src1.Float<T>(e * 2 + 1);
break;
case 180:
- element1 = FPNeg(src2.Float<T>(e * 2));
+ element1 = FPNeg(src2.Float<T>(f * 2));
element2 = src1.Float<T>(e * 2);
- element3 = FPNeg(src2.Float<T>(e * 2 + 1));
+ element3 = FPNeg(src2.Float<T>(f * 2 + 1));
element4 = src1.Float<T>(e * 2);
break;
case 270:
- element1 = src2.Float<T>(e * 2 + 1);
+ element1 = src2.Float<T>(f * 2 + 1);
element2 = src1.Float<T>(e * 2 + 1);
- element3 = FPNeg(src2.Float<T>(e * 2));
+ element3 = FPNeg(src2.Float<T>(f * 2));
element4 = src1.Float<T>(e * 2 + 1);
break;
default:
@@ -2385,25 +2826,28 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
return dst; // prevents "element(n) may be unintialized" errors
}
dst.ClearForWrite(vform);
- dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
- dst.SetFloat<T>(e * 2 + 1,
- FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
+ dst.SetFloat<T>(vform,
+ e * 2,
+ FPMulAdd(acc.Float<T>(e * 2), element2, element1));
+ dst.SetFloat<T>(vform,
+ e * 2 + 1,
+ FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
}
return dst;
}
-
LogicVRegister Simulator::fcmla(VectorFormat vform,
- LogicVRegister dst, // d
- const LogicVRegister& src1, // n
- const LogicVRegister& src2, // m
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ const LogicVRegister& acc,
int rot) {
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- VIXL_UNIMPLEMENTED();
+ fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- fcmla<float>(vform, dst, src1, src2, rot);
+ fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
} else {
- fcmla<double>(vform, dst, src1, src2, rot);
+ fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
}
return dst;
}
@@ -2418,9 +2862,9 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
VIXL_UNIMPLEMENTED();
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- fcmla<float>(vform, dst, src1, src2, index, rot);
+ fcmla<float>(vform, dst, src1, src2, dst, index, rot);
} else {
- fcmla<double>(vform, dst, src1, src2, index, rot);
+ fcmla<double>(vform, dst, src1, src2, dst, index, rot);
}
return dst;
}
@@ -2430,23 +2874,59 @@ LogicVRegister Simulator::dup_element(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int src_index) {
- int laneCount = LaneCountFromFormat(vform);
- uint64_t value = src.Uint(vform, src_index);
- dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
- dst.SetUint(vform, i, value);
+ if (vform == kFormatVnQ) {
+ // When duplicating a 128-bit value, split it into two 64-bit parts, and
+ // then copy the two to their slots on destination register.
+ uint64_t low = src.Uint(kFormatVnD, src_index * 2);
+ uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1);
+ dst.ClearForWrite(vform);
+ for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD);
+ d_lane += 2) {
+ dst.SetUint(kFormatVnD, d_lane, low);
+ dst.SetUint(kFormatVnD, d_lane + 1, high);
+ }
+ } else {
+ int lane_count = LaneCountFromFormat(vform);
+ uint64_t value = src.Uint(vform, src_index);
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < lane_count; ++i) {
+ dst.SetUint(vform, i, value);
+ }
}
return dst;
}
+LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int src_index) {
+ // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
+ // whereas in NEON, the size of segment is equal to the size of register
+ // itself.
+ int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
+ VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
+ int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
+
+ VIXL_ASSERT(src_index >= 0);
+ VIXL_ASSERT(src_index < lanes_per_segment);
+
+ dst.ClearForWrite(vform);
+ for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
+ uint64_t value = src.Uint(vform, j + src_index);
+ for (int i = 0; i < lanes_per_segment; i++) {
+ dst.SetUint(vform, j + i, value);
+ }
+ }
+ return dst;
+}
LogicVRegister Simulator::dup_immediate(VectorFormat vform,
LogicVRegister dst,
uint64_t imm) {
- int laneCount = LaneCountFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
uint64_t value = imm & MaxUintFromFormat(vform);
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, value);
}
return dst;
@@ -2473,12 +2953,93 @@ LogicVRegister Simulator::ins_immediate(VectorFormat vform,
}
+LogicVRegister Simulator::index(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t start,
+ uint64_t step) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ uint64_t value = start;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetUint(vform, i, value);
+ value += step;
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::insr(VectorFormat vform,
+ LogicVRegister dst,
+ uint64_t imm) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
+ dst.SetUint(vform, i, dst.Uint(vform, i - 1));
+ }
+ dst.SetUint(vform, 0, imm);
+ return dst;
+}
+
+
+LogicVRegister Simulator::mov(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ dst.ClearForWrite(vform);
+ for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ dst.SetUint(vform, lane, src.Uint(vform, lane));
+ }
+ return dst;
+}
+
+
+LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
+ // Avoid a copy if the registers already alias.
+ if (dst.Aliases(src)) return dst;
+
+ for (int i = 0; i < dst.GetChunkCount(); i++) {
+ dst.SetChunk(i, src.GetChunk(i));
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::mov_merging(VectorFormat vform,
+ LogicVRegister dst,
+ const SimPRegister& pg,
+ const LogicVRegister& src) {
+ return sel(vform, dst, pg, src, dst);
+}
+
+
+LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
+ LogicVRegister dst,
+ const SimPRegister& pg,
+ const LogicVRegister& src) {
+ SimVRegister zero;
+ dup_immediate(vform, zero, 0);
+ return sel(vform, dst, pg, src, zero);
+}
+
+
+LogicPRegister Simulator::mov_merging(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src) {
+ return sel(dst, pg, src, dst);
+}
+
+
+LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src) {
+ SimPRegister all_false;
+ return sel(dst, pg, src, pfalse(all_false));
+}
+
+
LogicVRegister Simulator::movi(VectorFormat vform,
LogicVRegister dst,
uint64_t imm) {
- int laneCount = LaneCountFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, imm);
}
return dst;
@@ -2488,9 +3049,9 @@ LogicVRegister Simulator::movi(VectorFormat vform,
LogicVRegister Simulator::mvni(VectorFormat vform,
LogicVRegister dst,
uint64_t imm) {
- int laneCount = LaneCountFromFormat(vform);
+ int lane_count = LaneCountFromFormat(vform);
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, ~imm);
}
return dst;
@@ -2502,12 +3063,12 @@ LogicVRegister Simulator::orr(VectorFormat vform,
const LogicVRegister& src,
uint64_t imm) {
uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; ++i) {
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; ++i) {
result[i] = src.Uint(vform, i) | imm;
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -2568,6 +3129,37 @@ LogicVRegister Simulator::sxtl2(VectorFormat vform,
}
+LogicVRegister Simulator::uxt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ unsigned from_size_in_bits) {
+ int lane_count = LaneCountFromFormat(vform);
+ uint64_t mask = GetUintMask(from_size_in_bits);
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetInt(vform, i, src.Uint(vform, i) & mask);
+ }
+ return dst;
+}
+
+
+LogicVRegister Simulator::sxt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ unsigned from_size_in_bits) {
+ int lane_count = LaneCountFromFormat(vform);
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < lane_count; i++) {
+ uint64_t value =
+ ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
+ dst.SetInt(vform, i, value);
+ }
+ return dst;
+}
+
+
LogicVRegister Simulator::shrn(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
@@ -2615,6 +3207,22 @@ LogicVRegister Simulator::rshrn2(VectorFormat vform,
return extractnarrow(vformdst, dst, false, shifted_src, false);
}
+LogicVRegister Simulator::Table(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& tab,
+ const LogicVRegister& ind) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; i++) {
+ uint64_t index = ind.Uint(vform, i);
+ uint64_t value = (index >= static_cast<uint64_t>(lane_count))
+ ? 0
+ : tab.Uint(vform, static_cast<int>(index));
+ dst.SetUint(vform, i, value);
+ }
+ return dst;
+}
+
LogicVRegister Simulator::Table(VectorFormat vform,
LogicVRegister dst,
@@ -3182,7 +3790,7 @@ LogicVRegister Simulator::umlsl(VectorFormat vform,
SimVRegister temp1, temp2;
uxtl(vform, temp1, src1);
uxtl(vform, temp2, src2);
- mls(vform, dst, temp1, temp2);
+ mls(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3194,7 +3802,7 @@ LogicVRegister Simulator::umlsl2(VectorFormat vform,
SimVRegister temp1, temp2;
uxtl2(vform, temp1, src1);
uxtl2(vform, temp2, src2);
- mls(vform, dst, temp1, temp2);
+ mls(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3206,7 +3814,7 @@ LogicVRegister Simulator::smlsl(VectorFormat vform,
SimVRegister temp1, temp2;
sxtl(vform, temp1, src1);
sxtl(vform, temp2, src2);
- mls(vform, dst, temp1, temp2);
+ mls(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3218,7 +3826,7 @@ LogicVRegister Simulator::smlsl2(VectorFormat vform,
SimVRegister temp1, temp2;
sxtl2(vform, temp1, src1);
sxtl2(vform, temp2, src2);
- mls(vform, dst, temp1, temp2);
+ mls(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3230,7 +3838,7 @@ LogicVRegister Simulator::umlal(VectorFormat vform,
SimVRegister temp1, temp2;
uxtl(vform, temp1, src1);
uxtl(vform, temp2, src2);
- mla(vform, dst, temp1, temp2);
+ mla(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3242,7 +3850,7 @@ LogicVRegister Simulator::umlal2(VectorFormat vform,
SimVRegister temp1, temp2;
uxtl2(vform, temp1, src1);
uxtl2(vform, temp2, src2);
- mla(vform, dst, temp1, temp2);
+ mla(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3254,7 +3862,7 @@ LogicVRegister Simulator::smlal(VectorFormat vform,
SimVRegister temp1, temp2;
sxtl(vform, temp1, src1);
sxtl(vform, temp2, src2);
- mla(vform, dst, temp1, temp2);
+ mla(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3266,7 +3874,7 @@ LogicVRegister Simulator::smlal2(VectorFormat vform,
SimVRegister temp1, temp2;
sxtl2(vform, temp1, src1);
sxtl2(vform, temp2, src2);
- mla(vform, dst, temp1, temp2);
+ mla(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3371,7 +3979,7 @@ LogicVRegister Simulator::dot(VectorFormat vform,
dst.ClearForWrite(vform);
for (int e = 0; e < LaneCountFromFormat(vform); e++) {
- int64_t result = 0;
+ uint64_t result = 0;
int64_t element1, element2;
for (int i = 0; i < 4; i++) {
int index = 4 * e + i;
@@ -3384,9 +3992,7 @@ LogicVRegister Simulator::dot(VectorFormat vform,
}
result += element1 * element2;
}
-
- result += dst.Int(vform, e);
- dst.SetInt(vform, e, result);
+ dst.SetUint(vform, e, result + dst.Uint(vform, e));
}
return dst;
}
@@ -3564,16 +4170,16 @@ LogicVRegister Simulator::trn1(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- int pairs = laneCount / 2;
+ uint64_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
result[2 * i] = src1.Uint(vform, 2 * i);
result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -3584,16 +4190,16 @@ LogicVRegister Simulator::trn2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- int pairs = laneCount / 2;
+ uint64_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
result[2 * i] = src1.Uint(vform, (2 * i) + 1);
result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -3604,16 +4210,16 @@ LogicVRegister Simulator::zip1(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- int pairs = laneCount / 2;
+ uint64_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
result[2 * i] = src1.Uint(vform, i);
result[(2 * i) + 1] = src2.Uint(vform, i);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -3624,16 +4230,16 @@ LogicVRegister Simulator::zip2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[16];
- int laneCount = LaneCountFromFormat(vform);
- int pairs = laneCount / 2;
+ uint64_t result[kZRegMaxSizeInBytes];
+ int lane_count = LaneCountFromFormat(vform);
+ int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
result[2 * i] = src1.Uint(vform, pairs + i);
result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[i]);
}
return dst;
@@ -3644,15 +4250,15 @@ LogicVRegister Simulator::uzp1(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[32];
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; ++i) {
+ uint64_t result[kZRegMaxSizeInBytes * 2];
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; ++i) {
result[i] = src1.Uint(vform, i);
- result[laneCount + i] = src2.Uint(vform, i);
+ result[lane_count + i] = src2.Uint(vform, i);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[2 * i]);
}
return dst;
@@ -3663,15 +4269,15 @@ LogicVRegister Simulator::uzp2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[32];
- int laneCount = LaneCountFromFormat(vform);
- for (int i = 0; i < laneCount; ++i) {
+ uint64_t result[kZRegMaxSizeInBytes * 2];
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; ++i) {
result[i] = src1.Uint(vform, i);
- result[laneCount + i] = src2.Uint(vform, i);
+ result[lane_count + i] = src2.Uint(vform, i);
}
dst.ClearForWrite(vform);
- for (int i = 0; i < laneCount; ++i) {
+ for (int i = 0; i < lane_count; ++i) {
dst.SetUint(vform, i, result[(2 * i) + 1]);
}
return dst;
@@ -4201,7 +4807,7 @@ uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
} else { \
result = OP(op1, op2); \
} \
- dst.SetFloat(i, result); \
+ dst.SetFloat(vform, i, result); \
} \
return dst; \
} \
@@ -4244,7 +4850,7 @@ LogicVRegister Simulator::frecps(VectorFormat vform,
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T result = FPProcessNaNs(op1, op2);
- dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
+ dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
}
return dst;
}
@@ -4276,7 +4882,7 @@ LogicVRegister Simulator::frsqrts(VectorFormat vform,
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T result = FPProcessNaNs(op1, op2);
- dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
+ dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
}
return dst;
}
@@ -4309,29 +4915,41 @@ LogicVRegister Simulator::fcmp(VectorFormat vform,
bool result = false;
T op1 = src1.Float<T>(i);
T op2 = src2.Float<T>(i);
- T nan_result = FPProcessNaNs(op1, op2);
- if (!IsNaN(nan_result)) {
- switch (cond) {
- case eq:
- result = (op1 == op2);
- break;
- case ge:
- result = (op1 >= op2);
- break;
- case gt:
- result = (op1 > op2);
- break;
- case le:
- result = (op1 <= op2);
- break;
- case lt:
- result = (op1 < op2);
- break;
- default:
- VIXL_UNREACHABLE();
- break;
- }
+ bool unordered = IsNaN(FPProcessNaNs(op1, op2));
+
+ switch (cond) {
+ case eq:
+ result = (op1 == op2);
+ break;
+ case ge:
+ result = (op1 >= op2);
+ break;
+ case gt:
+ result = (op1 > op2);
+ break;
+ case le:
+ result = (op1 <= op2);
+ break;
+ case lt:
+ result = (op1 < op2);
+ break;
+ case ne:
+ result = (op1 != op2);
+ break;
+ case uo:
+ result = unordered;
+ break;
+ default:
+ // Other conditions are defined in terms of those above.
+ VIXL_UNREACHABLE();
+ break;
+ }
+
+ if (result && unordered) {
+ // Only `uo` and `ne` can be true for unordered comparisons.
+ VIXL_ASSERT((cond == uo) || (cond == ne));
}
+
dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
}
return dst;
@@ -4403,15 +5021,16 @@ LogicVRegister Simulator::fabscmp(VectorFormat vform,
template <typename T>
LogicVRegister Simulator::fmla(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
T op1 = src1.Float<T>(i);
T op2 = src2.Float<T>(i);
- T acc = dst.Float<T>(i);
+ T acc = srca.Float<T>(i);
T result = FPMulAdd(acc, op1, op2);
- dst.SetFloat(i, result);
+ dst.SetFloat(vform, i, result);
}
return dst;
}
@@ -4419,15 +5038,16 @@ LogicVRegister Simulator::fmla(VectorFormat vform,
LogicVRegister Simulator::fmla(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2) {
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- fmla<SimFloat16>(vform, dst, src1, src2);
+ fmla<SimFloat16>(vform, dst, srca, src1, src2);
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- fmla<float>(vform, dst, src1, src2);
+ fmla<float>(vform, dst, srca, src1, src2);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
- fmla<double>(vform, dst, src1, src2);
+ fmla<double>(vform, dst, srca, src1, src2);
}
return dst;
}
@@ -4436,13 +5056,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform,
template <typename T>
LogicVRegister Simulator::fmls(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
- T acc = dst.Float<T>(i);
+ T acc = srca.Float<T>(i);
T result = FPMulAdd(acc, op1, op2);
dst.SetFloat(i, result);
}
@@ -4452,15 +5073,16 @@ LogicVRegister Simulator::fmls(VectorFormat vform,
LogicVRegister Simulator::fmls(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2) {
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- fmls<SimFloat16>(vform, dst, src1, src2);
+ fmls<SimFloat16>(vform, dst, srca, src1, src2);
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- fmls<float>(vform, dst, src1, src2);
+ fmls<float>(vform, dst, srca, src1, src2);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
- fmls<double>(vform, dst, src1, src2);
+ fmls<double>(vform, dst, srca, src1, src2);
}
return dst;
}
@@ -4740,75 +5362,131 @@ NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
#undef DEFINE_NEON_FP_PAIR_OP
template <typename T>
-LogicVRegister Simulator::fminmaxv(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src,
- typename TFPMinMaxOp<T>::type Op) {
- VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) ||
- (vform == kFormat4S));
- USE(vform);
- T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1));
- T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3));
- if (vform == kFormat8H) {
- T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5));
- T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7));
- result1 = (this->*Op)(result1, result3);
- result2 = (this->*Op)(result2, result4);
- }
- T result = (this->*Op)(result1, result2);
+LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ typename TFPPairOp<T>::type fn,
+ uint64_t inactive_value) {
+ int lane_count = LaneCountFromFormat(vform);
+ T result[kZRegMaxSizeInBytes / sizeof(T)];
+ // Copy the source vector into a working array. Initialise the unused elements
+ // at the end of the array to the same value that a false predicate would set.
+ for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
+ result[i] = (i < lane_count)
+ ? src.Float<T>(i)
+ : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
+ }
+
+ // Pairwise reduce the elements to a single value, using the pair op function
+ // argument.
+ for (int step = 1; step < lane_count; step *= 2) {
+ for (int i = 0; i < lane_count; i += step * 2) {
+ result[i] = (this->*fn)(result[i], result[i + step]);
+ }
+ }
dst.ClearForWrite(ScalarFormatFromFormat(vform));
- dst.SetFloat<T>(0, result);
- return dst;
+ dst.SetFloat<T>(0, result[0]);
+ return dst;
+}
+
+LogicVRegister Simulator::FPPairedAcrossHelper(
+ VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ typename TFPPairOp<SimFloat16>::type fn16,
+ typename TFPPairOp<float>::type fn32,
+ typename TFPPairOp<double>::type fn64,
+ uint64_t inactive_value) {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case kHRegSize:
+ return FPPairedAcrossHelper<SimFloat16>(vform,
+ dst,
+ src,
+ fn16,
+ inactive_value);
+ case kSRegSize:
+ return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
+ default:
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ return FPPairedAcrossHelper<double>(vform,
+ dst,
+ src,
+ fn64,
+ inactive_value);
+ }
}
+LogicVRegister Simulator::faddv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ return FPPairedAcrossHelper(vform,
+ dst,
+ src,
+ &Simulator::FPAdd<SimFloat16>,
+ &Simulator::FPAdd<float>,
+ &Simulator::FPAdd<double>,
+ 0);
+}
LogicVRegister Simulator::fmaxv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>);
- } else {
- return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>);
- }
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ uint64_t inactive_value =
+ FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
+ return FPPairedAcrossHelper(vform,
+ dst,
+ src,
+ &Simulator::FPMax<SimFloat16>,
+ &Simulator::FPMax<float>,
+ &Simulator::FPMax<double>,
+ inactive_value);
}
LogicVRegister Simulator::fminv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>);
- } else {
- return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>);
- }
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ uint64_t inactive_value =
+ FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
+ return FPPairedAcrossHelper(vform,
+ dst,
+ src,
+ &Simulator::FPMin<SimFloat16>,
+ &Simulator::FPMin<float>,
+ &Simulator::FPMin<double>,
+ inactive_value);
}
LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- return fminmaxv<SimFloat16>(vform,
- dst,
- src,
- &Simulator::FPMaxNM<SimFloat16>);
- } else {
- return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>);
- }
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+ return FPPairedAcrossHelper(vform,
+ dst,
+ src,
+ &Simulator::FPMaxNM<SimFloat16>,
+ &Simulator::FPMaxNM<float>,
+ &Simulator::FPMaxNM<double>,
+ inactive_value);
}
LogicVRegister Simulator::fminnmv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- return fminmaxv<SimFloat16>(vform,
- dst,
- src,
- &Simulator::FPMinNM<SimFloat16>);
- } else {
- return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>);
- }
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+ uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+ return FPPairedAcrossHelper(vform,
+ dst,
+ src,
+ &Simulator::FPMinNM<SimFloat16>,
+ &Simulator::FPMinNM<float>,
+ &Simulator::FPMinNM<double>,
+ inactive_value);
}
@@ -4843,14 +5521,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform,
SimVRegister temp;
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
- fmla<SimFloat16>(vform, dst, src1, index_reg);
+ fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
- fmla<float>(vform, dst, src1, index_reg);
+ fmla<float>(vform, dst, dst, src1, index_reg);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
- fmla<double>(vform, dst, src1, index_reg);
+ fmla<double>(vform, dst, dst, src1, index_reg);
}
return dst;
}
@@ -4865,14 +5543,14 @@ LogicVRegister Simulator::fmls(VectorFormat vform,
SimVRegister temp;
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
- fmls<SimFloat16>(vform, dst, src1, index_reg);
+ fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
- fmls<float>(vform, dst, src1, index_reg);
+ fmls<float>(vform, dst, dst, src1, index_reg);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
- fmls<double>(vform, dst, src1, index_reg);
+ fmls<double>(vform, dst, dst, src1, index_reg);
}
return dst;
}
@@ -4941,62 +5619,142 @@ LogicVRegister Simulator::frint(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::fcvt(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src) {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+ 0,
+ src.Uint(vform, i));
+ double dst_value =
+ RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
+
+ uint64_t dst_raw_bits =
+ FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
+
+ dst.SetUint(vform, i, dst_raw_bits);
+ }
+
+ return dst;
+}
LogicVRegister Simulator::fcvts(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src,
- FPRounding rounding_mode,
+ FPRounding round,
int fbits) {
- dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- SimFloat16 op =
- static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
- dst.SetInt(vform, i, FPToInt16(op, rounding_mode));
- }
- } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- float op = src.Float<float>(i) * std::pow(2.0f, fbits);
- dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
- }
- } else {
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- double op = src.Float<double>(i) * std::pow(2.0, fbits);
- dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+ 0,
+ src.Uint(vform, i));
+ double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
+ std::pow(2.0, fbits);
+
+ switch (dst_data_size_in_bits) {
+ case kHRegSize:
+ dst.SetInt(vform, i, FPToInt16(result, round));
+ break;
+ case kSRegSize:
+ dst.SetInt(vform, i, FPToInt32(result, round));
+ break;
+ case kDRegSize:
+ dst.SetInt(vform, i, FPToInt64(result, round));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
}
}
+
return dst;
}
+LogicVRegister Simulator::fcvts(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits) {
+ dst.ClearForWrite(vform);
+ return fcvts(vform,
+ LaneSizeInBitsFromFormat(vform),
+ LaneSizeInBitsFromFormat(vform),
+ dst,
+ GetPTrue(),
+ src,
+ round,
+ fbits);
+}
LogicVRegister Simulator::fcvtu(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src,
- FPRounding rounding_mode,
+ FPRounding round,
int fbits) {
- dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- SimFloat16 op =
- static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
- dst.SetUint(vform, i, FPToUInt16(op, rounding_mode));
- }
- } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- float op = src.Float<float>(i) * std::pow(2.0f, fbits);
- dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
- }
- } else {
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- double op = src.Float<double>(i) * std::pow(2.0, fbits);
- dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+ 0,
+ src.Uint(vform, i));
+ double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
+ std::pow(2.0, fbits);
+
+ switch (dst_data_size_in_bits) {
+ case kHRegSize:
+ dst.SetUint(vform, i, FPToUInt16(result, round));
+ break;
+ case kSRegSize:
+ dst.SetUint(vform, i, FPToUInt32(result, round));
+ break;
+ case kDRegSize:
+ dst.SetUint(vform, i, FPToUInt64(result, round));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
}
}
+
return dst;
}
+LogicVRegister Simulator::fcvtu(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits) {
+ dst.ClearForWrite(vform);
+ return fcvtu(vform,
+ LaneSizeInBitsFromFormat(vform),
+ LaneSizeInBitsFromFormat(vform),
+ dst,
+ GetPTrue(),
+ src,
+ round,
+ fbits);
+}
LogicVRegister Simulator::fcvtl(VectorFormat vform,
LogicVRegister dst,
@@ -5208,18 +5966,18 @@ LogicVRegister Simulator::frsqrte(VectorFormat vform,
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
SimFloat16 input = src.Float<SimFloat16>(i);
- dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input));
+ dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
}
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float input = src.Float<float>(i);
- dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
+ dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
double input = src.Float<double>(i);
- dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
+ dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
}
}
return dst;
@@ -5354,18 +6112,18 @@ LogicVRegister Simulator::frecpe(VectorFormat vform,
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
SimFloat16 input = src.Float<SimFloat16>(i);
- dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round));
+ dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
}
} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float input = src.Float<float>(i);
- dst.SetFloat(i, FPRecipEstimate<float>(input, round));
+ dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
double input = src.Float<double>(i);
- dst.SetFloat(i, FPRecipEstimate<double>(input, round));
+ dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
}
}
return dst;
@@ -5426,6 +6184,47 @@ LogicVRegister Simulator::urecpe(VectorFormat vform,
return dst;
}
+LogicPRegister Simulator::pfalse(LogicPRegister dst) {
+ dst.Clear();
+ return dst;
+}
+
+LogicPRegister Simulator::pfirst(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src) {
+ int first_pg = GetFirstActive(kFormatVnB, pg);
+ VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
+ mov(dst, src);
+ if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
+ return dst;
+}
+
+LogicPRegister Simulator::ptrue(VectorFormat vform,
+ LogicPRegister dst,
+ int pattern) {
+ int count = GetPredicateConstraintLaneCount(vform, pattern);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetActive(vform, i, i < count);
+ }
+ return dst;
+}
+
+LogicPRegister Simulator::pnext(VectorFormat vform,
+ LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src) {
+ int next = GetLastActive(vform, src) + 1;
+ while (next < LaneCountFromFormat(vform)) {
+ if (pg.IsActive(vform, next)) break;
+ next++;
+ }
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetActive(vform, i, (i == next));
+ }
+ return dst;
+}
+
template <typename T>
LogicVRegister Simulator::frecpx(VectorFormat vform,
LogicVRegister dst,
@@ -5477,49 +6276,1143 @@ LogicVRegister Simulator::frecpx(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::ftsmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister maybe_neg_src1;
+
+ // The bottom bit of src2 controls the sign of the result. Use it to
+ // conditionally invert the sign of one `fmul` operand.
+ shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
+ eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
+
+ // Multiply src1 by the modified neg_src1, which is potentially its negation.
+ // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
+ // rather than neg_src1, must be the first source argument.
+ fmul(vform, dst, src1, maybe_neg_src1);
+
+ return dst;
+}
+
+LogicVRegister Simulator::ftssel(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
+ uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
+ uint64_t one;
+
+ if (lane_bits == kHRegSize) {
+ one = Float16ToRawbits(Float16(1.0));
+ } else if (lane_bits == kSRegSize) {
+ one = FloatToRawbits(1.0);
+ } else {
+ VIXL_ASSERT(lane_bits == kDRegSize);
+ one = DoubleToRawbits(1.0);
+ }
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Use integer accessors for this operation, as this is a data manipulation
+ // task requiring no calculation.
+ uint64_t op = src1.Uint(vform, i);
+
+ // Only the bottom two bits of the src2 register are significant, indicating
+ // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
+ // determines the sign of the value written to dst.
+ uint64_t q = src2.Uint(vform, i);
+ if ((q & 1) == 1) op = one;
+ if ((q & 2) == 2) op ^= sign_bit;
+
+ dst.SetUint(vform, i, op);
+ }
+
+ return dst;
+}
+
+template <typename T>
+LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ uint64_t coeff_pos,
+ uint64_t coeff_neg) {
+ SimVRegister zero;
+ dup_immediate(kFormatVnB, zero, 0);
+
+ SimVRegister cf;
+ SimVRegister cfn;
+ dup_immediate(vform, cf, coeff_pos);
+ dup_immediate(vform, cfn, coeff_neg);
+
+ // The specification requires testing the top bit of the raw value, rather
+ // than the sign of the floating point number, so use an integer comparison
+ // here.
+ SimPRegister is_neg;
+ SVEIntCompareVectorsHelper(lt,
+ vform,
+ is_neg,
+ GetPTrue(),
+ src2,
+ zero,
+ false,
+ LeaveFlags);
+ mov_merging(vform, cf, is_neg, cfn);
+
+ SimVRegister temp;
+ fabs_<T>(vform, temp, src2);
+ fmla<T>(vform, cf, cf, src1, temp);
+ mov(vform, dst, cf);
+ return dst;
+}
+
+
+LogicVRegister Simulator::ftmad(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ unsigned index) {
+ static const uint64_t ftmad_coeff16[] = {0x3c00,
+ 0xb155,
+ 0x2030,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x3c00,
+ 0xb800,
+ 0x293a,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000};
+
+ static const uint64_t ftmad_coeff32[] = {0x3f800000,
+ 0xbe2aaaab,
+ 0x3c088886,
+ 0xb95008b9,
+ 0x36369d6d,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x3f800000,
+ 0xbf000000,
+ 0x3d2aaaa6,
+ 0xbab60705,
+ 0x37cd37cc,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000};
+
+ static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
+ 0xbfc5555555555543,
+ 0x3f8111111110f30c,
+ 0xbf2a01a019b92fc6,
+ 0x3ec71de351f3d22b,
+ 0xbe5ae5e2b60f7b91,
+ 0x3de5d8408868552f,
+ 0x0000000000000000,
+ 0x3ff0000000000000,
+ 0xbfe0000000000000,
+ 0x3fa5555555555536,
+ 0xbf56c16c16c13a0b,
+ 0x3efa01a019b1e8d8,
+ 0xbe927e4f7282f468,
+ 0x3e21ee96d2641b13,
+ 0xbda8f76380fbb401};
+ VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
+ VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
+ VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
+
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ FTMaddHelper<SimFloat16>(vform,
+ dst,
+ src1,
+ src2,
+ ftmad_coeff16[index],
+ ftmad_coeff16[index + 8]);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ FTMaddHelper<float>(vform,
+ dst,
+ src1,
+ src2,
+ ftmad_coeff32[index],
+ ftmad_coeff32[index + 8]);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ FTMaddHelper<double>(vform,
+ dst,
+ src1,
+ src2,
+ ftmad_coeff64[index],
+ ftmad_coeff64[index + 8]);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::fexpa(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
+ 0x005d, 0x0075, 0x008e, 0x00a8,
+ 0x00c2, 0x00dc, 0x00f8, 0x0114,
+ 0x0130, 0x014d, 0x016b, 0x0189,
+ 0x01a8, 0x01c8, 0x01e8, 0x0209,
+ 0x022b, 0x024e, 0x0271, 0x0295,
+ 0x02ba, 0x02e0, 0x0306, 0x032e,
+ 0x0356, 0x037f, 0x03a9, 0x03d4};
+
+ static const uint64_t fexpa_coeff32[] =
+ {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
+ 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
+ 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
+ 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
+ 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
+ 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
+ 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
+ 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
+ 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
+ 0x7d3e0c};
+
+ static const uint64_t fexpa_coeff64[] =
+ {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
+ 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
+ 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
+ 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
+ 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
+ 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
+ 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
+ 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
+ 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
+ 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
+ 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
+ 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
+ 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
+ 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
+ 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
+ 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
+
+ unsigned lane_size = LaneSizeInBitsFromFormat(vform);
+ int index_highbit = 5;
+ int op_highbit, op_shift;
+ const uint64_t* fexpa_coeff;
+
+ if (lane_size == kHRegSize) {
+ index_highbit = 4;
+ VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
+ fexpa_coeff = fexpa_coeff16;
+ op_highbit = 9;
+ op_shift = 10;
+ } else if (lane_size == kSRegSize) {
+ VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
+ fexpa_coeff = fexpa_coeff32;
+ op_highbit = 13;
+ op_shift = 23;
+ } else {
+ VIXL_ASSERT(lane_size == kDRegSize);
+ VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
+ fexpa_coeff = fexpa_coeff64;
+ op_highbit = 16;
+ op_shift = 52;
+ }
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t op = src.Uint(vform, i);
+ uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
+ result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+template <typename T>
+LogicVRegister Simulator::fscale(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ T two = T(2.0);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ T s1 = src1.Float<T>(i);
+ if (!IsNaN(s1)) {
+ int64_t scale = src2.Int(vform, i);
+ // TODO: this is a low-performance implementation, but it's simple and
+ // less likely to be buggy. Consider replacing it with something faster.
+
+ // Scales outside of these bounds become infinity or zero, so there's no
+ // point iterating further.
+ scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
+
+ // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and
+ // decrement scale until it's zero.
+ while (scale-- > 0) {
+ s1 = FPMul(s1, two);
+ }
+
+ // If scale is negative, divide by two and increment scale until it's
+ // zero. Initially, scale is (src2 - 1), so we pre-increment.
+ while (++scale < 0) {
+ s1 = FPDiv(s1, two);
+ }
+ }
+ dst.SetFloat<T>(i, s1);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::fscale(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ fscale<SimFloat16>(vform, dst, src1, src2);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fscale<float>(vform, dst, src1, src2);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fscale<double>(vform, dst, src1, src2);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::scvtf(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits) {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
+ 0,
+ src.Uint(vform, i));
+
+ switch (dst_data_size_in_bits) {
+ case kHRegSize: {
+ SimFloat16 result = FixedToFloat16(value, fbits, round);
+ dst.SetUint(vform, i, Float16ToRawbits(result));
+ break;
+ }
+ case kSRegSize: {
+ float result = FixedToFloat(value, fbits, round);
+ dst.SetUint(vform, i, FloatToRawbits(result));
+ break;
+ }
+ case kDRegSize: {
+ double result = FixedToDouble(value, fbits, round);
+ dst.SetUint(vform, i, DoubleToRawbits(result));
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ }
+
+ return dst;
+}
+
LogicVRegister Simulator::scvtf(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int fbits,
FPRounding round) {
+ return scvtf(vform,
+ LaneSizeInBitsFromFormat(vform),
+ LaneSizeInBitsFromFormat(vform),
+ dst,
+ GetPTrue(),
+ src,
+ round,
+ fbits);
+}
+
+LogicVRegister Simulator::ucvtf(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits) {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round);
- dst.SetFloat<SimFloat16>(i, result);
- } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
- dst.SetFloat<float>(i, result);
- } else {
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
- double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
- dst.SetFloat<double>(i, result);
+ if (!pg.IsActive(vform, i)) continue;
+
+ uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+ 0,
+ src.Uint(vform, i));
+
+ switch (dst_data_size_in_bits) {
+ case kHRegSize: {
+ SimFloat16 result = UFixedToFloat16(value, fbits, round);
+ dst.SetUint(vform, i, Float16ToRawbits(result));
+ break;
+ }
+ case kSRegSize: {
+ float result = UFixedToFloat(value, fbits, round);
+ dst.SetUint(vform, i, FloatToRawbits(result));
+ break;
+ }
+ case kDRegSize: {
+ double result = UFixedToDouble(value, fbits, round);
+ dst.SetUint(vform, i, DoubleToRawbits(result));
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
}
}
+
return dst;
}
-
LogicVRegister Simulator::ucvtf(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int fbits,
FPRounding round) {
+ return ucvtf(vform,
+ LaneSizeInBitsFromFormat(vform),
+ LaneSizeInBitsFromFormat(vform),
+ dst,
+ GetPTrue(),
+ src,
+ round,
+ fbits);
+}
+
+LogicVRegister Simulator::unpk(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ UnpackType unpack_type,
+ ExtendType extend_type) {
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ const int lane_count = LaneCountFromFormat(vform);
+ const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
+
+ switch (extend_type) {
+ case kSignedExtend: {
+ int64_t result[kZRegMaxSizeInBytes];
+ for (int i = 0; i < lane_count; ++i) {
+ result[i] = src.Int(vform_half, i + src_start_lane);
+ }
+ for (int i = 0; i < lane_count; ++i) {
+ dst.SetInt(vform, i, result[i]);
+ }
+ break;
+ }
+ case kUnsignedExtend: {
+ uint64_t result[kZRegMaxSizeInBytes];
+ for (int i = 0; i < lane_count; ++i) {
+ result[i] = src.Uint(vform_half, i + src_start_lane);
+ }
+ for (int i = 0; i < lane_count; ++i) {
+ dst.SetUint(vform, i, result[i]);
+ }
+ break;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ }
+ return dst;
+}
+
+LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
+ VectorFormat vform,
+ LogicPRegister dst,
+ const LogicPRegister& mask,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool is_wide_elements,
+ FlagsUpdate flags) {
+ for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ bool result = false;
+ if (mask.IsActive(vform, lane)) {
+ int64_t op1 = 0xbadbeef;
+ int64_t op2 = 0xbadbeef;
+ int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
+ switch (cond) {
+ case eq:
+ case ge:
+ case gt:
+ case lt:
+ case le:
+ case ne:
+ op1 = src1.Int(vform, lane);
+ op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
+ : src2.Int(vform, lane);
+ break;
+ case hi:
+ case hs:
+ case ls:
+ case lo:
+ op1 = src1.Uint(vform, lane);
+ op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
+ : src2.Uint(vform, lane);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ switch (cond) {
+ case eq:
+ result = (op1 == op2);
+ break;
+ case ne:
+ result = (op1 != op2);
+ break;
+ case ge:
+ result = (op1 >= op2);
+ break;
+ case gt:
+ result = (op1 > op2);
+ break;
+ case le:
+ result = (op1 <= op2);
+ break;
+ case lt:
+ result = (op1 < op2);
+ break;
+ case hs:
+ result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
+ break;
+ case hi:
+ result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
+ break;
+ case ls:
+ result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
+ break;
+ case lo:
+ result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ }
+ dst.SetActive(vform, lane, result);
+ }
+
+ if (flags == SetFlags) PredTest(vform, mask, dst);
+
+ return dst;
+}
+
+LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
+ VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool is_wide_elements) {
+ unsigned lane_size = LaneSizeInBitsFromFormat(vform);
+ VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
+
+ for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ int shift_src_lane = lane;
+ if (is_wide_elements) {
+ // If the shift amount comes from wide elements, select the D-sized lane
+ // which occupies the corresponding lanes of the value to be shifted.
+ shift_src_lane = (lane * lane_size) / kDRegSize;
+ }
+ uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
+
+ // Saturate shift_amount to the size of the lane that will be shifted.
+ if (shift_amount > lane_size) shift_amount = lane_size;
+
+ uint64_t value = src1.Uint(vform, lane);
+ int64_t result = ShiftOperand(lane_size,
+ value,
+ shift_op,
+ static_cast<unsigned>(shift_amount));
+ dst.SetUint(vform, lane, result);
+ }
+
+ return dst;
+}
+
+LogicVRegister Simulator::asrd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ int shift) {
+ VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
+ LaneSizeInBitsFromFormat(vform)));
+
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
- SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round);
- dst.SetFloat<SimFloat16>(i, result);
- } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
- float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
- dst.SetFloat<float>(i, result);
+ int64_t value = src1.Int(vform, i);
+ if (shift <= 63) {
+ if (value < 0) {
+ // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
+ // cast to int64_t, and cannot cause signed overflow in the result.
+ value = value + GetUintMask(shift);
+ }
+ value = ShiftOperand(kDRegSize, value, ASR, shift);
} else {
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
- double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
- dst.SetFloat<double>(i, result);
+ value = 0;
}
+ dst.SetInt(vform, i, value);
}
return dst;
}
+LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
+ LogicalOp logical_op,
+ VectorFormat vform,
+ LogicVRegister zd,
+ const LogicVRegister& zn,
+ const LogicVRegister& zm) {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t op1 = zn.Uint(vform, i);
+ uint64_t op2 = zm.Uint(vform, i);
+ uint64_t result;
+ switch (logical_op) {
+ case AND:
+ result = op1 & op2;
+ break;
+ case BIC:
+ result = op1 & ~op2;
+ break;
+ case EOR:
+ result = op1 ^ op2;
+ break;
+ case ORR:
+ result = op1 | op2;
+ break;
+ default:
+ result = 0;
+ VIXL_UNIMPLEMENTED();
+ }
+ zd.SetUint(vform, i, result);
+ }
+
+ return zd;
+}
+
+LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
+ LogicPRegister pd,
+ const LogicPRegister& pn,
+ const LogicPRegister& pm) {
+ for (int i = 0; i < pn.GetChunkCount(); i++) {
+ LogicPRegister::ChunkType op1 = pn.GetChunk(i);
+ LogicPRegister::ChunkType op2 = pm.GetChunk(i);
+ LogicPRegister::ChunkType result;
+ switch (op) {
+ case ANDS_p_p_pp_z:
+ case AND_p_p_pp_z:
+ result = op1 & op2;
+ break;
+ case BICS_p_p_pp_z:
+ case BIC_p_p_pp_z:
+ result = op1 & ~op2;
+ break;
+ case EORS_p_p_pp_z:
+ case EOR_p_p_pp_z:
+ result = op1 ^ op2;
+ break;
+ case NANDS_p_p_pp_z:
+ case NAND_p_p_pp_z:
+ result = ~(op1 & op2);
+ break;
+ case NORS_p_p_pp_z:
+ case NOR_p_p_pp_z:
+ result = ~(op1 | op2);
+ break;
+ case ORNS_p_p_pp_z:
+ case ORN_p_p_pp_z:
+ result = op1 | ~op2;
+ break;
+ case ORRS_p_p_pp_z:
+ case ORR_p_p_pp_z:
+ result = op1 | op2;
+ break;
+ default:
+ result = 0;
+ VIXL_UNIMPLEMENTED();
+ }
+ pd.SetChunk(i, result);
+ }
+ return pd;
+}
+
+LogicVRegister Simulator::SVEBitwiseImmHelper(
+ SVEBitwiseLogicalWithImm_UnpredicatedOp op,
+ VectorFormat vform,
+ LogicVRegister zd,
+ uint64_t imm) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t op1 = zd.Uint(vform, i);
+ uint64_t result;
+ switch (op) {
+ case AND_z_zi:
+ result = op1 & imm;
+ break;
+ case EOR_z_zi:
+ result = op1 ^ imm;
+ break;
+ case ORR_z_zi:
+ result = op1 | imm;
+ break;
+ default:
+ result = 0;
+ VIXL_UNIMPLEMENTED();
+ }
+ zd.SetUint(vform, i, result);
+ }
+
+ return zd;
+}
+
+void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
+ const LogicPRegister& pg,
+ unsigned zt_code,
+ const LogicSVEAddressVector& addr) {
+ VIXL_ASSERT(zt_code < kNumberOfZRegisters);
+
+ int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+ int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
+ int msize_in_bytes = addr.GetMsizeInBytes();
+ int reg_count = addr.GetRegCount();
+
+ VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+ VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
+
+ unsigned zt_codes[4] = {zt_code,
+ (zt_code + 1) % kNumberOfZRegisters,
+ (zt_code + 2) % kNumberOfZRegisters,
+ (zt_code + 3) % kNumberOfZRegisters};
+
+ LogicVRegister zt[4] = {
+ ReadVRegister(zt_codes[0]),
+ ReadVRegister(zt_codes[1]),
+ ReadVRegister(zt_codes[2]),
+ ReadVRegister(zt_codes[3]),
+ };
+
+ // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
+ // are ignored, so read the source register using the VectorFormat that
+ // corresponds with the storage format, and multiply the index accordingly.
+ VectorFormat unpack_vform =
+ SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
+ int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (!pg.IsActive(vform, i)) continue;
+
+ for (int r = 0; r < reg_count; r++) {
+ uint64_t element_address = addr.GetElementAddress(i, r);
+ zt[r].WriteUintToMem(unpack_vform, i << unpack_shift, element_address);
+ }
+ }
+
+ if (ShouldTraceWrites()) {
+ PrintRegisterFormat format = GetPrintRegisterFormat(vform);
+ if (esize_in_bytes_log2 == msize_in_bytes_log2) {
+ // Use an FP format where it's likely that we're accessing FP data.
+ format = GetPrintRegisterFormatTryFP(format);
+ }
+ // Stores don't represent a change to the source register's value, so only
+ // print the relevant part of the value.
+ format = GetPrintRegPartial(format);
+
+ PrintZStructAccess(zt_code,
+ reg_count,
+ pg,
+ format,
+ msize_in_bytes,
+ "->",
+ addr);
+ }
+}
+
+void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
+ const LogicPRegister& pg,
+ unsigned zt_code,
+ const LogicSVEAddressVector& addr,
+ bool is_signed) {
+ int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+ int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
+ int msize_in_bytes = addr.GetMsizeInBytes();
+ int reg_count = addr.GetRegCount();
+
+ VIXL_ASSERT(zt_code < kNumberOfZRegisters);
+ VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+ VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
+
+ unsigned zt_codes[4] = {zt_code,
+ (zt_code + 1) % kNumberOfZRegisters,
+ (zt_code + 2) % kNumberOfZRegisters,
+ (zt_code + 3) % kNumberOfZRegisters};
+ LogicVRegister zt[4] = {
+ ReadVRegister(zt_codes[0]),
+ ReadVRegister(zt_codes[1]),
+ ReadVRegister(zt_codes[2]),
+ ReadVRegister(zt_codes[3]),
+ };
+
+ VectorFormat unpack_vform =
+ SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ for (int r = 0; r < reg_count; r++) {
+ uint64_t element_address = addr.GetElementAddress(i, r);
+
+ if (!pg.IsActive(vform, i)) {
+ zt[r].SetUint(vform, i, 0);
+ continue;
+ }
+
+ if (is_signed) {
+ zt[r].ReadIntFromMem(vform,
+ LaneSizeInBitsFromFormat(unpack_vform),
+ i,
+ element_address);
+
+ } else {
+ zt[r].ReadUintFromMem(vform,
+ LaneSizeInBitsFromFormat(unpack_vform),
+ i,
+ element_address);
+ }
+ }
+ }
+
+ if (ShouldTraceVRegs()) {
+ PrintRegisterFormat format = GetPrintRegisterFormat(vform);
+ if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
+ // Use an FP format where it's likely that we're accessing FP data.
+ format = GetPrintRegisterFormatTryFP(format);
+ }
+ PrintZStructAccess(zt_code,
+ reg_count,
+ pg,
+ format,
+ msize_in_bytes,
+ "<-",
+ addr);
+ }
+}
+
+LogicPRegister Simulator::brka(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn) {
+ bool break_ = false;
+ for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+ if (pg.IsActive(kFormatVnB, i)) {
+ pd.SetActive(kFormatVnB, i, !break_);
+ break_ |= pn.IsActive(kFormatVnB, i);
+ }
+ }
+
+ return pd;
+}
+
+LogicPRegister Simulator::brkb(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn) {
+ bool break_ = false;
+ for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+ if (pg.IsActive(kFormatVnB, i)) {
+ break_ |= pn.IsActive(kFormatVnB, i);
+ pd.SetActive(kFormatVnB, i, !break_);
+ }
+ }
+
+ return pd;
+}
+
+LogicPRegister Simulator::brkn(LogicPRegister pdm,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn) {
+ if (!IsLastActive(kFormatVnB, pg, pn)) {
+ pfalse(pdm);
+ }
+ return pdm;
+}
+
+LogicPRegister Simulator::brkpa(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn,
+ const LogicPRegister& pm) {
+ bool last_active = IsLastActive(kFormatVnB, pg, pn);
+
+ for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+ bool active = false;
+ if (pg.IsActive(kFormatVnB, i)) {
+ active = last_active;
+ last_active = last_active && !pm.IsActive(kFormatVnB, i);
+ }
+ pd.SetActive(kFormatVnB, i, active);
+ }
+
+ return pd;
+}
+
+LogicPRegister Simulator::brkpb(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn,
+ const LogicPRegister& pm) {
+ bool last_active = IsLastActive(kFormatVnB, pg, pn);
+
+ for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+ bool active = false;
+ if (pg.IsActive(kFormatVnB, i)) {
+ last_active = last_active && !pm.IsActive(kFormatVnB, i);
+ active = last_active;
+ }
+ pd.SetActive(kFormatVnB, i, active);
+ }
+
+ return pd;
+}
+
+void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
+ const LogicPRegister& pg,
+ unsigned zt_code,
+ const LogicSVEAddressVector& addr,
+ SVEFaultTolerantLoadType type,
+ bool is_signed) {
+ int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
+ int msize_in_bits = addr.GetMsizeInBits();
+ int msize_in_bytes = addr.GetMsizeInBytes();
+
+ VIXL_ASSERT(zt_code < kNumberOfZRegisters);
+ VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
+ VIXL_ASSERT(addr.GetRegCount() == 1);
+
+ LogicVRegister zt = ReadVRegister(zt_code);
+ LogicPRegister ffr = ReadFFR();
+
+ // Non-faulting loads are allowed to fail arbitrarily. To stress user
+ // code, fail a random element in roughly one in eight full-vector loads.
+ uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
+ int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t value = 0;
+
+ if (pg.IsActive(vform, i)) {
+ uint64_t element_address = addr.GetElementAddress(i, 0);
+
+ if (type == kSVEFirstFaultLoad) {
+ // First-faulting loads always load the first active element, regardless
+ // of FFR. The result will be discarded if its FFR lane is inactive, but
+ // it could still generate a fault.
+ value = Memory::Read(msize_in_bytes, element_address);
+ // All subsequent elements have non-fault semantics.
+ type = kSVENonFaultLoad;
+
+ } else if (ffr.IsActive(vform, i)) {
+ // Simulation of fault-tolerant loads relies on system calls, and is
+ // likely to be relatively slow, so we only actually perform the load if
+ // its FFR lane is active.
+
+ bool can_read = (i < fake_fault_at_lane) &&
+ CanReadMemory(element_address, msize_in_bytes);
+ if (can_read) {
+ value = Memory::Read(msize_in_bytes, element_address);
+ } else {
+ // Propagate the fault to the end of FFR.
+ for (int j = i; j < LaneCountFromFormat(vform); j++) {
+ ffr.SetActive(vform, j, false);
+ }
+ }
+ }
+ }
+
+ // The architecture permits a few possible results for inactive FFR lanes
+ // (including those caused by a fault in this instruction). We choose to
+ // leave the register value unchanged (like merging predication) because
+ // no other input to this instruction can have the same behaviour.
+ //
+ // Note that this behaviour takes precedence over pg's zeroing predication.
+
+ if (ffr.IsActive(vform, i)) {
+ int msb = msize_in_bits - 1;
+ if (is_signed) {
+ zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
+ } else {
+ zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
+ }
+ }
+ }
+
+ if (ShouldTraceVRegs()) {
+ PrintRegisterFormat format = GetPrintRegisterFormat(vform);
+ if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
+ // Use an FP format where it's likely that we're accessing FP data.
+ format = GetPrintRegisterFormatTryFP(format);
+ }
+ // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
+ // expects a single mask, so combine the two predicates.
+ SimPRegister mask;
+ SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
+ PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
+ }
+}
+
+void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
+ VectorFormat vform,
+ SVEOffsetModifier mod) {
+ bool is_signed = instr->ExtractBit(14) == 0;
+ bool is_ff = instr->ExtractBit(13) == 1;
+ // Note that these instructions don't use the Dtype encoding.
+ int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
+ int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+ uint64_t base = ReadXRegister(instr->GetRn());
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ vform,
+ mod,
+ scale);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ if (is_ff) {
+ SVEFaultTolerantLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ kSVEFirstFaultLoad,
+ is_signed);
+ } else {
+ SVEStructuredLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ is_signed);
+ }
+}
+
+int Simulator::GetFirstActive(VectorFormat vform,
+ const LogicPRegister& pg) const {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (pg.IsActive(vform, i)) return i;
+ }
+ return -1;
+}
+
+int Simulator::GetLastActive(VectorFormat vform,
+ const LogicPRegister& pg) const {
+ for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+ if (pg.IsActive(vform, i)) return i;
+ }
+ return -1;
+}
+
+int Simulator::CountActiveLanes(VectorFormat vform,
+ const LogicPRegister& pg) const {
+ int count = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ count += pg.IsActive(vform, i) ? 1 : 0;
+ }
+ return count;
+}
+
+int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn) const {
+ int count = 0;
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
+ }
+ return count;
+}
+
+int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
+ int pattern) const {
+ VIXL_ASSERT(IsSVEFormat(vform));
+ int all = LaneCountFromFormat(vform);
+ VIXL_ASSERT(all > 0);
+
+ switch (pattern) {
+ case SVE_VL1:
+ case SVE_VL2:
+ case SVE_VL3:
+ case SVE_VL4:
+ case SVE_VL5:
+ case SVE_VL6:
+ case SVE_VL7:
+ case SVE_VL8:
+ // VL1-VL8 are encoded directly.
+ VIXL_STATIC_ASSERT(SVE_VL1 == 1);
+ VIXL_STATIC_ASSERT(SVE_VL8 == 8);
+ return (pattern <= all) ? pattern : 0;
+ case SVE_VL16:
+ case SVE_VL32:
+ case SVE_VL64:
+ case SVE_VL128:
+ case SVE_VL256: {
+ // VL16-VL256 are encoded as log2(N) + c.
+ int min = 16 << (pattern - SVE_VL16);
+ return (min <= all) ? min : 0;
+ }
+ // Special cases.
+ case SVE_POW2:
+ return 1 << HighestSetBitPosition(all);
+ case SVE_MUL4:
+ return all - (all % 4);
+ case SVE_MUL3:
+ return all - (all % 3);
+ case SVE_ALL:
+ return all;
+ }
+ // Unnamed cases archicturally return 0.
+ return 0;
+}
+
+uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
+ if (IsContiguous()) {
+ return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
+ }
+
+ VIXL_ASSERT(IsScatterGather());
+ VIXL_ASSERT(vector_ != NULL);
+
+ // For scatter-gather accesses, we need to extract the offset from vector_,
+ // and apply modifiers.
+
+ uint64_t offset = 0;
+ switch (vector_form_) {
+ case kFormatVnS:
+ offset = vector_->GetLane<uint32_t>(lane);
+ break;
+ case kFormatVnD:
+ offset = vector_->GetLane<uint64_t>(lane);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ switch (vector_mod_) {
+ case SVE_MUL_VL:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case SVE_LSL:
+ // We apply the shift below. There's nothing to do here.
+ break;
+ case NO_SVE_OFFSET_MODIFIER:
+ VIXL_ASSERT(vector_shift_ == 0);
+ break;
+ case SVE_UXTW:
+ offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
+ break;
+ case SVE_SXTW:
+ offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
+ break;
+ }
+
+ return base_ + (offset << vector_shift_);
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index 85954fc9..56c6eaf6 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -65,7 +65,7 @@ LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
void LiteralPool::Reset() {
- std::vector<RawLiteral *>::iterator it, end;
+ std::vector<RawLiteral*>::iterator it, end;
for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
RawLiteral* literal = *it;
if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
@@ -145,7 +145,7 @@ void LiteralPool::Emit(EmitOption option) {
}
// Now populate the literal pool.
- std::vector<RawLiteral *>::iterator it, end;
+ std::vector<RawLiteral*>::iterator it, end;
for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
VIXL_ASSERT((*it)->IsUsed());
masm_->place(*it);
@@ -321,11 +321,13 @@ MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic)
generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
sp_(sp),
tmp_list_(ip0, ip1),
- fptmp_list_(d31),
+ v_tmp_list_(d31),
+ p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
current_scratch_scope_(NULL),
literal_pool_(this),
veneer_pool_(this),
- recommended_checkpoint_(Pool::kNoCheckpointRequired) {
+ recommended_checkpoint_(Pool::kNoCheckpointRequired),
+ fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
checkpoint_ = GetNextCheckPoint();
#ifndef VIXL_DEBUG
USE(allow_macro_instructions_);
@@ -342,11 +344,13 @@ MacroAssembler::MacroAssembler(size_t capacity,
generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
sp_(sp),
tmp_list_(ip0, ip1),
- fptmp_list_(d31),
+ v_tmp_list_(d31),
+ p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
current_scratch_scope_(NULL),
literal_pool_(this),
veneer_pool_(this),
- recommended_checkpoint_(Pool::kNoCheckpointRequired) {
+ recommended_checkpoint_(Pool::kNoCheckpointRequired),
+ fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
checkpoint_ = GetNextCheckPoint();
}
@@ -361,11 +365,13 @@ MacroAssembler::MacroAssembler(byte* buffer,
generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
sp_(sp),
tmp_list_(ip0, ip1),
- fptmp_list_(d31),
+ v_tmp_list_(d31),
+ p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
current_scratch_scope_(NULL),
literal_pool_(this),
veneer_pool_(this),
- recommended_checkpoint_(Pool::kNoCheckpointRequired) {
+ recommended_checkpoint_(Pool::kNoCheckpointRequired),
+ fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
checkpoint_ = GetNextCheckPoint();
}
@@ -819,6 +825,12 @@ void MacroAssembler::LogicalMacro(const Register& rd,
// * 1 instruction to move to sp
MacroEmissionCheckScope guard(this);
UseScratchRegisterScope temps(this);
+ // Use `rd` as a temp, if we can.
+ temps.Include(rd);
+ // We read `rn` after evaluating `operand`.
+ temps.Exclude(rn);
+ // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
+ // because we don't need it after it is evaluated.
if (operand.IsImmediate()) {
uint64_t immediate = operand.GetImmediate();
@@ -886,6 +898,7 @@ void MacroAssembler::LogicalMacro(const Register& rd,
} else {
// Immediate can't be encoded: synthesize using move immediate.
Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Aliases(rn));
// If the left-hand input is the stack pointer, we can't pre-shift the
// immediate, as the encoding won't allow the subsequent post shift.
@@ -910,8 +923,8 @@ void MacroAssembler::LogicalMacro(const Register& rd,
operand.GetRegister().Is64Bits() ||
((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
- temps.Exclude(operand.GetRegister());
Register temp = temps.AcquireSameSizeAs(rn);
+ VIXL_ASSERT(!temp.Aliases(rn));
EmitExtendShift(temp,
operand.GetRegister(),
operand.GetExtend(),
@@ -1139,17 +1152,13 @@ void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
// Call the macro assembler for generic immediates.
Mvn(rd, operand.GetImmediate());
} else if (operand.IsExtendedRegister()) {
- UseScratchRegisterScope temps(this);
- temps.Exclude(operand.GetRegister());
-
// Emit two instructions for the extend case. This differs from Mov, as
// the extend and invert can't be achieved in one instruction.
- Register temp = temps.AcquireSameSizeAs(rd);
- EmitExtendShift(temp,
+ EmitExtendShift(rd,
operand.GetRegister(),
operand.GetExtend(),
operand.GetShiftAmount());
- mvn(rd, Operand(temp));
+ mvn(rd, rd);
} else {
// Otherwise, register and shifted register cases can be handled by the
// assembler directly, using orn.
@@ -1418,12 +1427,15 @@ void MacroAssembler::Add(const Register& rd,
const Operand& operand,
FlagsUpdate S) {
VIXL_ASSERT(allow_macro_instructions_);
- if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
- IsImmAddSub(-operand.GetImmediate())) {
- AddSubMacro(rd, rn, -operand.GetImmediate(), S, SUB);
- } else {
- AddSubMacro(rd, rn, operand, S, ADD);
+ if (operand.IsImmediate()) {
+ int64_t imm = operand.GetImmediate();
+ if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
+ IsImmAddSub(-imm)) {
+ AddSubMacro(rd, rn, -imm, S, SUB);
+ return;
+ }
}
+ AddSubMacro(rd, rn, operand, S, ADD);
}
@@ -1439,12 +1451,15 @@ void MacroAssembler::Sub(const Register& rd,
const Operand& operand,
FlagsUpdate S) {
VIXL_ASSERT(allow_macro_instructions_);
- if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
- IsImmAddSub(-operand.GetImmediate())) {
- AddSubMacro(rd, rn, -operand.GetImmediate(), S, ADD);
- } else {
- AddSubMacro(rd, rn, operand, S, SUB);
+ if (operand.IsImmediate()) {
+ int64_t imm = operand.GetImmediate();
+ if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
+ IsImmAddSub(-imm)) {
+ AddSubMacro(rd, rn, -imm, S, ADD);
+ return;
+ }
}
+ AddSubMacro(rd, rn, operand, S, SUB);
}
@@ -1757,6 +1772,12 @@ void MacroAssembler::AddSubMacro(const Register& rd,
(rn.IsZero() && !operand.IsShiftedRegister()) ||
(operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
UseScratchRegisterScope temps(this);
+ // Use `rd` as a temp, if we can.
+ temps.Include(rd);
+ // We read `rn` after evaluating `operand`.
+ temps.Exclude(rn);
+ // It doesn't matter if `operand` is in `temps` (e.g. because it alises
+ // `rd`) because we don't need it after it is evaluated.
Register temp = temps.AcquireSameSizeAs(rn);
if (operand.IsImmediate()) {
PreShiftImmMode mode = kAnyShift;
@@ -1842,6 +1863,12 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
// * 1 instruction for add/sub
MacroEmissionCheckScope guard(this);
UseScratchRegisterScope temps(this);
+ // Use `rd` as a temp, if we can.
+ temps.Include(rd);
+ // We read `rn` after evaluating `operand`.
+ temps.Exclude(rn);
+ // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
+ // because we don't need it after it is evaluated.
if (operand.IsImmediate() ||
(operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
@@ -1856,7 +1883,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
VIXL_ASSERT(
IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
operand.GetShiftAmount()));
- temps.Exclude(operand.GetRegister());
Register temp = temps.AcquireSameSizeAs(rn);
EmitShift(temp,
operand.GetRegister(),
@@ -1872,7 +1898,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
VIXL_ASSERT(
operand.GetRegister().Is64Bits() ||
((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
- temps.Exclude(operand.GetRegister());
Register temp = temps.AcquireSameSizeAs(rn);
EmitExtendShift(temp,
operand.GetRegister(),
@@ -2397,7 +2422,8 @@ void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
// We do not handle pre-indexing or post-indexing.
VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
VIXL_ASSERT(!registers.Overlaps(tmp_list_));
- VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
+ VIXL_ASSERT(!registers.Overlaps(v_tmp_list_));
+ VIXL_ASSERT(!registers.Overlaps(p_tmp_list_));
VIXL_ASSERT(!registers.IncludesAliasOf(sp));
UseScratchRegisterScope temps(this);
@@ -2481,7 +2507,7 @@ void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
}
-// TODO(all): Fix printf for NEON registers.
+// TODO(all): Fix printf for NEON and SVE registers.
// This is the main Printf implementation. All callee-saved registers are
// preserved, but NZCV and the caller-saved registers may be clobbered.
@@ -2764,32 +2790,6 @@ void MacroAssembler::Log(TraceParameters parameters) {
}
-void MacroAssembler::EnableInstrumentation() {
- VIXL_ASSERT(!isprint(InstrumentStateEnable));
- ExactAssemblyScope scope(this, kInstructionSize);
- movn(xzr, InstrumentStateEnable);
-}
-
-
-void MacroAssembler::DisableInstrumentation() {
- VIXL_ASSERT(!isprint(InstrumentStateDisable));
- ExactAssemblyScope scope(this, kInstructionSize);
- movn(xzr, InstrumentStateDisable);
-}
-
-
-void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
- VIXL_ASSERT(strlen(marker_name) == 2);
-
- // We allow only printable characters in the marker names. Unprintable
- // characters are reserved for controlling features of the instrumentation.
- VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
-
- ExactAssemblyScope scope(this, kInstructionSize);
- movn(xzr, (marker_name[1] << 8) | marker_name[0]);
-}
-
-
void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) {
ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode);
}
@@ -2870,10 +2870,13 @@ void UseScratchRegisterScope::Open(MacroAssembler* masm) {
CPURegList* available = masm->GetScratchRegisterList();
CPURegList* available_v = masm->GetScratchVRegisterList();
+ CPURegList* available_p = masm->GetScratchPRegisterList();
old_available_ = available->GetList();
old_available_v_ = available_v->GetList();
+ old_available_p_ = available_p->GetList();
VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister);
+ VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister);
parent_ = masm->GetCurrentScratchRegisterScope();
masm->SetCurrentScratchRegisterScope(this);
@@ -2891,6 +2894,7 @@ void UseScratchRegisterScope::Close() {
masm_->GetScratchRegisterList()->SetList(old_available_);
masm_->GetScratchVRegisterList()->SetList(old_available_v_);
+ masm_->GetScratchPRegisterList()->SetList(old_available_p_);
masm_ = NULL;
}
@@ -2899,44 +2903,46 @@ void UseScratchRegisterScope::Close() {
bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
- masm_->GetScratchVRegisterList()->IncludesAliasOf(reg);
+ masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) ||
+ masm_->GetScratchPRegisterList()->IncludesAliasOf(reg);
}
-
Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
- int code = AcquireNextAvailable(masm_->GetScratchRegisterList()).GetCode();
+ int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode();
return Register(code, size_in_bits);
}
VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
- int code = AcquireNextAvailable(masm_->GetScratchVRegisterList()).GetCode();
+ int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode();
return VRegister(code, size_in_bits);
}
void UseScratchRegisterScope::Release(const CPURegister& reg) {
VIXL_ASSERT(masm_ != NULL);
- if (reg.IsRegister()) {
- ReleaseByCode(masm_->GetScratchRegisterList(), reg.GetCode());
- } else if (reg.IsVRegister()) {
- ReleaseByCode(masm_->GetScratchVRegisterList(), reg.GetCode());
- } else {
- VIXL_ASSERT(reg.IsNone());
- }
+
+ // Release(NoReg) has no effect.
+ if (reg.IsNone()) return;
+
+ ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode());
}
void UseScratchRegisterScope::Include(const CPURegList& list) {
VIXL_ASSERT(masm_ != NULL);
+
+ // Including an empty list has no effect.
+ if (list.IsEmpty()) return;
+ VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister);
+
+ RegList reg_list = list.GetList();
if (list.GetType() == CPURegister::kRegister) {
// Make sure that neither sp nor xzr are included the list.
- IncludeByRegList(masm_->GetScratchRegisterList(),
- list.GetList() & ~(xzr.GetBit() | sp.GetBit()));
- } else {
- VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
- IncludeByRegList(masm_->GetScratchVRegisterList(), list.GetList());
+ reg_list &= ~(xzr.GetBit() | sp.GetBit());
}
+
+ IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list);
}
@@ -2964,13 +2970,43 @@ void UseScratchRegisterScope::Include(const VRegister& reg1,
}
-void UseScratchRegisterScope::Exclude(const CPURegList& list) {
- if (list.GetType() == CPURegister::kRegister) {
- ExcludeByRegList(masm_->GetScratchRegisterList(), list.GetList());
- } else {
- VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
- ExcludeByRegList(masm_->GetScratchVRegisterList(), list.GetList());
+void UseScratchRegisterScope::Include(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4) {
+ RegList include = 0;
+ RegList include_v = 0;
+ RegList include_p = 0;
+
+ const CPURegister regs[] = {reg1, reg2, reg3, reg4};
+
+ for (size_t i = 0; i < ArrayLength(regs); i++) {
+ RegList bit = regs[i].GetBit();
+ switch (regs[i].GetBank()) {
+ case CPURegister::kNoRegisterBank:
+ // Include(NoReg) has no effect.
+ VIXL_ASSERT(regs[i].IsNone());
+ break;
+ case CPURegister::kRRegisterBank:
+ include |= bit;
+ break;
+ case CPURegister::kVRegisterBank:
+ include_v |= bit;
+ break;
+ case CPURegister::kPRegisterBank:
+ include_p |= bit;
+ break;
+ }
}
+
+ IncludeByRegList(masm_->GetScratchRegisterList(), include);
+ IncludeByRegList(masm_->GetScratchVRegisterList(), include_v);
+ IncludeByRegList(masm_->GetScratchPRegisterList(), include_p);
+}
+
+
+void UseScratchRegisterScope::Exclude(const CPURegList& list) {
+ ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList());
}
@@ -2988,9 +3024,9 @@ void UseScratchRegisterScope::Exclude(const VRegister& reg1,
const VRegister& reg2,
const VRegister& reg3,
const VRegister& reg4) {
- RegList excludefp =
+ RegList exclude_v =
reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
- ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp);
+ ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
}
@@ -2999,22 +3035,33 @@ void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
const CPURegister& reg3,
const CPURegister& reg4) {
RegList exclude = 0;
- RegList excludefp = 0;
+ RegList exclude_v = 0;
+ RegList exclude_p = 0;
const CPURegister regs[] = {reg1, reg2, reg3, reg4};
for (size_t i = 0; i < ArrayLength(regs); i++) {
- if (regs[i].IsRegister()) {
- exclude |= regs[i].GetBit();
- } else if (regs[i].IsVRegister()) {
- excludefp |= regs[i].GetBit();
- } else {
- VIXL_ASSERT(regs[i].IsNone());
+ RegList bit = regs[i].GetBit();
+ switch (regs[i].GetBank()) {
+ case CPURegister::kNoRegisterBank:
+ // Exclude(NoReg) has no effect.
+ VIXL_ASSERT(regs[i].IsNone());
+ break;
+ case CPURegister::kRRegisterBank:
+ exclude |= bit;
+ break;
+ case CPURegister::kVRegisterBank:
+ exclude_v |= bit;
+ break;
+ case CPURegister::kPRegisterBank:
+ exclude_p |= bit;
+ break;
}
}
ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
- ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp);
+ ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
+ ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p);
}
@@ -3023,13 +3070,15 @@ void UseScratchRegisterScope::ExcludeAll() {
masm_->GetScratchRegisterList()->GetList());
ExcludeByRegList(masm_->GetScratchVRegisterList(),
masm_->GetScratchVRegisterList()->GetList());
+ ExcludeByRegList(masm_->GetScratchPRegisterList(),
+ masm_->GetScratchPRegisterList()->GetList());
}
-CPURegister UseScratchRegisterScope::AcquireNextAvailable(
- CPURegList* available) {
- VIXL_CHECK(!available->IsEmpty());
- CPURegister result = available->PopLowestIndex();
+CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available,
+ RegList mask) {
+ VIXL_CHECK((available->GetList() & mask) != 0);
+ CPURegister result = available->PopLowestIndex(mask);
VIXL_ASSERT(!AreAliased(result, xzr, sp));
return result;
}
@@ -3057,5 +3106,22 @@ void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
available->SetList(available->GetList() & ~exclude);
}
+CPURegList* UseScratchRegisterScope::GetAvailableListFor(
+ CPURegister::RegisterBank bank) {
+ switch (bank) {
+ case CPURegister::kNoRegisterBank:
+ return NULL;
+ case CPURegister::kRRegisterBank:
+ return masm_->GetScratchRegisterList();
+ case CPURegister::kVRegisterBank:
+ return masm_->GetScratchVRegisterList();
+ case CPURegister::kPRegisterBank:
+ return masm_->GetScratchPRegisterList();
+ return NULL;
+ }
+ VIXL_UNREACHABLE();
+ return NULL;
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 31db8dab..8becddbb 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -35,7 +35,6 @@
#include "../macro-assembler-interface.h"
#include "assembler-aarch64.h"
-#include "instrument-aarch64.h"
// Required for runtime call support.
// TODO: Break this dependency. We should be able to separate out the necessary
// parts so that we don't need to include the whole simulator header.
@@ -61,7 +60,7 @@
#define LSPAIR_MACRO_LIST(V) \
V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \
V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \
- V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x)
+ V(Ldpsw, Register&, rt, rt2, LDPSW_x)
namespace vixl {
namespace aarch64 {
@@ -528,6 +527,57 @@ class MacroEmissionCheckScope : public EmissionCheckScope {
};
+// This scope simplifies the handling of the SVE `movprfx` instruction.
+//
+// If dst.Aliases(src):
+// - Start an ExactAssemblyScope(masm, kInstructionSize).
+// Otherwise:
+// - Start an ExactAssemblyScope(masm, 2 * kInstructionSize).
+// - Generate a suitable `movprfx` instruction.
+//
+// In both cases, the ExactAssemblyScope is left with enough remaining space for
+// exactly one destructive instruction.
+class MovprfxHelperScope : public ExactAssemblyScope {
+ public:
+ inline MovprfxHelperScope(MacroAssembler* masm,
+ const ZRegister& dst,
+ const ZRegister& src);
+
+ inline MovprfxHelperScope(MacroAssembler* masm,
+ const ZRegister& dst,
+ const PRegister& pg,
+ const ZRegister& src);
+
+ // TODO: Implement constructors that examine _all_ sources. If `dst` aliases
+ // any other source register, we can't use `movprfx`. This isn't obviously
+ // useful, but the MacroAssembler should not generate invalid code for it.
+ // Valid behaviour can be implemented using `mov`.
+ //
+ // The best way to handle this in an instruction-agnostic way is probably to
+ // use variadic templates.
+
+ private:
+ inline bool ShouldGenerateMovprfx(const ZRegister& dst,
+ const ZRegister& src) {
+ VIXL_ASSERT(AreSameLaneSize(dst, src));
+ return !dst.Aliases(src);
+ }
+
+ inline bool ShouldGenerateMovprfx(const ZRegister& dst,
+ const PRegister& pg,
+ const ZRegister& src) {
+ VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+ // We need to emit movprfx in two cases:
+ // 1. To give a predicated merging unary instruction zeroing predication.
+ // 2. To make destructive instructions constructive.
+ //
+ // There are no predicated zeroing instructions that can take movprfx, so we
+ // will never generate an unnecessary movprfx with this logic.
+ return pg.IsZeroing() || ShouldGenerateMovprfx(dst, src);
+ }
+};
+
+
enum BranchType {
// Copies of architectural conditions.
// The associated conditions can be used in place of those, the code will
@@ -566,7 +616,19 @@ enum BranchType {
kBranchTypeFirstCondition = eq,
kBranchTypeLastCondition = nv,
kBranchTypeFirstUsingReg = reg_zero,
- kBranchTypeFirstUsingBit = reg_bit_clear
+ kBranchTypeFirstUsingBit = reg_bit_clear,
+
+ // SVE branch conditions.
+ integer_none = eq,
+ integer_any = ne,
+ integer_nlast = cs,
+ integer_last = cc,
+ integer_first = mi,
+ integer_nfrst = pl,
+ integer_pmore = hi,
+ integer_plast = ls,
+ integer_tcont = ge,
+ integer_tstop = lt
};
@@ -587,6 +649,18 @@ enum PreShiftImmMode {
kAnyShift // Allow any pre-shift.
};
+enum FPMacroNaNPropagationOption {
+ // The default option. This generates a run-time error in macros that respect
+ // this option.
+ NoFPMacroNaNPropagationSelected,
+ // For example, Fmin(result, NaN(a), NaN(b)) always selects NaN(a) if both
+ // NaN(a) and NaN(b) are both quiet, or both are signalling, at the
+ // cost of extra code generation in some cases.
+ StrictNaNPropagation,
+ // For example, Fmin(result, NaN(a), NaN(b)) selects either NaN, but using the
+ // fewest instructions.
+ FastNaNPropagation
+};
class MacroAssembler : public Assembler, public MacroAssemblerInterface {
public:
@@ -946,6 +1020,20 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
void Claim(const Operand& size);
void Drop(const Operand& size);
+ // As above, but for multiples of the SVE vector length.
+ void ClaimVL(int64_t multiplier) {
+ // We never need to worry about sp alignment because the VL is always a
+ // multiple of 16.
+ VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0);
+ VIXL_ASSERT(multiplier >= 0);
+ Addvl(sp, sp, -multiplier);
+ }
+ void DropVL(int64_t multiplier) {
+ VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0);
+ VIXL_ASSERT(multiplier >= 0);
+ Addvl(sp, sp, multiplier);
+ }
+
// Preserve the callee-saved registers (as defined by AAPCS64).
//
// Higher-numbered registers are pushed before lower-numbered registers, and
@@ -1489,13 +1577,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
void Fmov(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
- // Only emit an instruction if vd and vn are different, and they are both D
- // registers. fmov(s0, s0) is not a no-op because it clears the top word of
- // d0. Technically, fmov(d0, d0) is not a no-op either because it clears
- // the top of q0, but VRegister does not currently support Q registers.
- if (!vd.Is(vn) || !vd.Is64Bits()) {
- fmov(vd, vn);
- }
+ // TODO: Use DiscardMoveMode to allow this move to be elided if vd.Is(vn).
+ fmov(vd, vn);
}
void Fmov(const VRegister& vd, const Register& rn) {
VIXL_ASSERT(allow_macro_instructions_);
@@ -1503,12 +1586,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
SingleEmissionCheckScope guard(this);
fmov(vd, rn);
}
- void Fmov(const VRegister& vd, const XRegister& xn) {
- Fmov(vd, Register(xn));
- }
- void Fmov(const VRegister& vd, const WRegister& wn) {
- Fmov(vd, Register(wn));
- }
void Fmov(const VRegister& vd, int index, const Register& rn) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
@@ -2970,6 +3047,43 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
NEON_2VREG_SHIFT_LONG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
#undef DEFINE_MACRO_ASM_FUNC
+// SVE 3 vector register instructions.
+#define SVE_3VREG_COMMUTATIVE_MACRO_LIST(V) \
+ V(add, Add) \
+ V(and_, And) \
+ V(bic, Bic) \
+ V(eor, Eor) \
+ V(mul, Mul) \
+ V(orr, Orr) \
+ V(sabd, Sabd) \
+ V(smax, Smax) \
+ V(smulh, Smulh) \
+ V(smin, Smin) \
+ V(uabd, Uabd) \
+ V(umax, Umax) \
+ V(umin, Umin) \
+ V(umulh, Umulh)
+
+#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
+ void MASM(const ZRegister& zd, \
+ const PRegisterM& pg, \
+ const ZRegister& zn, \
+ const ZRegister& zm) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ if (zd.Aliases(zn)) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(zd, pg, zd, zm); \
+ } else if (zd.Aliases(zm)) { \
+ SingleEmissionCheckScope guard(this); \
+ ASM(zd, pg, zd, zn); \
+ } else { \
+ MovprfxHelperScope guard(this, zd, pg, zn); \
+ ASM(zd, pg, zd, zm); \
+ } \
+ }
+ SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
@@ -3357,6 +3471,2901 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
crc32cx(rd, rn, rm);
}
+ // Scalable Vector Extensions.
+ void Abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ abs(zd, pg, zn);
+ }
+ void Add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ add(zd, zn, zm);
+ }
+ void Add(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ AddSubHelper(kAddImmediate, zd, zn, imm);
+ }
+ void Addpl(const Register& xd, const Register& xn, int64_t multiplier);
+ void Addvl(const Register& xd, const Register& xn, int64_t multiplier);
+ // Note that unlike the core ISA, SVE's `adr` is not PC-relative.
+ void Adr(const ZRegister& zd, const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ adr(zd, addr);
+ }
+ void And(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ and_(pd, pg, pn, pm);
+ }
+ void And(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+ and_(zd, zn, imm);
+ } else {
+ // TODO: Synthesise the immediate once 'Mov' is implemented.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void And(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ SingleEmissionCheckScope guard(this);
+ and_(zd.VnD(), zn.VnD(), zm.VnD());
+ }
+ void Ands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ands(pd, pg, pn, pm);
+ }
+ void Andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ andv(vd, pg, zn);
+ }
+ void Asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ asr(zd, pg, zd, shift);
+ }
+ void Asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Asr(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ asr(zd, zn, shift);
+ }
+ void Asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ asr(zd, zn, zm);
+ }
+ void Asrd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ asrd(zd, pg, zd, shift);
+ }
+ void Bic(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ bic(pd, pg, pn, pm);
+ }
+ void Bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ SingleEmissionCheckScope guard(this);
+ bic(zd.VnD(), zn.VnD(), zm.VnD());
+ }
+ void Bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+ bic(zd, zn, imm);
+ } else {
+ // TODO: Synthesise the immediate once 'Mov' is implemented.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Bics(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ bics(pd, pg, pn, pm);
+ }
+ void Brka(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brka(pd, pg, pn);
+ }
+ void Brkas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkas(pd, pg, pn);
+ }
+ void Brkb(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkb(pd, pg, pn);
+ }
+ void Brkbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkbs(pd, pg, pn);
+ }
+ void Brkn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (!pd.Aliases(pm)) {
+ Mov(pd, pm);
+ }
+ SingleEmissionCheckScope guard(this);
+ brkn(pd, pg, pn, pd);
+ }
+ void Brkns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (!pd.Aliases(pm)) {
+ Mov(pd, pm);
+ }
+ SingleEmissionCheckScope guard(this);
+ brkns(pd, pg, pn, pd);
+ }
+ void Brkpa(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkpa(pd, pg, pn, pm);
+ }
+ void Brkpas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkpas(pd, pg, pn, pm);
+ }
+ void Brkpb(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkpb(pd, pg, pn, pm);
+ }
+ void Brkpbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brkpbs(pd, pg, pn, pm);
+ }
+ void Clasta(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ clasta(rd, pg, rn, zm);
+ }
+ void Clasta(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ clasta(vd, pg, vn, zm);
+ }
+ void Clasta(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Clastb(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ clastb(rd, pg, rn, zm);
+ }
+ void Clastb(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ clastb(vd, pg, vn, zm);
+ }
+ void Clastb(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cls(zd, pg, zn);
+ }
+ void Clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ clz(zd, pg, zn);
+ }
+ void Cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmpeq(pd, pg, zn, zm);
+ }
+ void Cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm5;
+ if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+ SingleEmissionCheckScope guard(this);
+ cmpeq(pd, pg, zn, imm5);
+ } else {
+ CompareHelper(eq, pd, pg, zn, imm);
+ }
+ }
+ void Cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmpge(pd, pg, zn, zm);
+ }
+ void Cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm5;
+ if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+ SingleEmissionCheckScope guard(this);
+ cmpge(pd, pg, zn, imm5);
+ } else {
+ CompareHelper(ge, pd, pg, zn, imm);
+ }
+ }
+ void Cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmpgt(pd, pg, zn, zm);
+ }
+ void Cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm5;
+ if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+ SingleEmissionCheckScope guard(this);
+ cmpgt(pd, pg, zn, imm5);
+ } else {
+ CompareHelper(gt, pd, pg, zn, imm);
+ }
+ }
+ void Cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmphi(pd, pg, zn, zm);
+ }
+ void Cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (imm.IsUintN(7)) {
+ SingleEmissionCheckScope guard(this);
+ cmphi(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+ } else {
+ CompareHelper(hi, pd, pg, zn, imm);
+ }
+ }
+ void Cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmphs(pd, pg, zn, zm);
+ }
+ void Cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ if (imm.IsUintN(7)) {
+ SingleEmissionCheckScope guard(this);
+ cmphs(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+ } else {
+ CompareHelper(hs, pd, pg, zn, imm);
+ }
+ }
+ void Cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmple(pd, pg, zn, zm);
+ }
+ void Cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm5;
+ if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+ SingleEmissionCheckScope guard(this);
+ cmple(pd, pg, zn, imm5);
+ } else {
+ CompareHelper(le, pd, pg, zn, imm);
+ }
+ }
+ void Cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmplo(pd, pg, zn, zm);
+ }
+ void Cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ if (imm.IsUintN(7)) {
+ SingleEmissionCheckScope guard(this);
+ cmplo(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+ } else {
+ CompareHelper(lo, pd, pg, zn, imm);
+ }
+ }
+ void Cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmpls(pd, pg, zn, zm);
+ }
+ void Cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ if (imm.IsUintN(7)) {
+ SingleEmissionCheckScope guard(this);
+ cmpls(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+ } else {
+ CompareHelper(ls, pd, pg, zn, imm);
+ }
+ }
+ void Cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmplt(pd, pg, zn, zm);
+ }
+ void Cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm5;
+ if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+ SingleEmissionCheckScope guard(this);
+ cmplt(pd, pg, zn, imm5);
+ } else {
+ CompareHelper(lt, pd, pg, zn, imm);
+ }
+ }
+ void Cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cmpne(pd, pg, zn, zm);
+ }
+ void Cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm5;
+ if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+ SingleEmissionCheckScope guard(this);
+ cmpne(pd, pg, zn, imm5);
+ } else {
+ CompareHelper(ne, pd, pg, zn, imm);
+ }
+ }
+ void Cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cnot(zd, pg, zn);
+ }
+ void Cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cnt(zd, pg, zn);
+ }
+ void Cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cntb(rd, pattern, multiplier);
+ }
+ void Cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cntd(rd, pattern, multiplier);
+ }
+ void Cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cnth(rd, pattern, multiplier);
+ }
+ void Cntp(const Register& rd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ // The `cntp` instruction architecturally takes an X register, but the
+ // result will always be in the range [0, kPRegMaxSize] (and therefore
+ // always fits in a W register), so we can accept a W-sized rd here.
+ cntp(rd.X(), pg, pn);
+ }
+ void Cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cntw(rd, pattern, multiplier);
+ }
+ void Compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ compact(zd, pg, zn);
+ }
+ void Cpy(const ZRegister& zd, const PRegister& pg, IntegerOperand imm);
+ void Cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cpy(zd, pg, rn);
+ }
+ void Cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cpy(zd, pg, vn);
+ }
+ void Ctermeq(const Register& rn, const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ctermeq(rn, rm);
+ }
+ void Ctermne(const Register& rn, const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ctermne(rn, rm);
+ }
+ void Decb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ decb(rdn, pattern, multiplier);
+ }
+ void Decd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ decd(rdn, pattern, multiplier);
+ }
+ void Decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ decd(zdn, pattern, multiplier);
+ }
+ void Dech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ dech(rdn, pattern, multiplier);
+ }
+ void Dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ dech(zdn, pattern, multiplier);
+ }
+ void Decp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ decp(rdn, pg);
+ }
+ void Decp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ // `decp` writes every lane, so use an unpredicated movprfx.
+ MovprfxHelperScope guard(this, zd, zn);
+ decp(zd, pg);
+ }
+ void Decp(const ZRegister& zdn, const PRegister& pg) { Decp(zdn, pg, zdn); }
+ void Decw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ decw(rdn, pattern, multiplier);
+ }
+ void Decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ decw(zdn, pattern, multiplier);
+ }
+ void Dup(const ZRegister& zd, const Register& xn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ dup(zd, xn);
+ }
+ void Dup(const ZRegister& zd, const ZRegister& zn, int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ dup(zd, zn, index);
+ }
+ void Dup(const ZRegister& zd, IntegerOperand imm);
+ void Eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+ eon(zd, zn, imm);
+ } else {
+ // TODO: Synthesise the immediate once 'Mov' is implemented.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Eor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ eor(pd, pg, pn, pm);
+ }
+ void Eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+ eor(zd, zn, imm);
+ } else {
+ // TODO: Synthesise the immediate once 'Mov' is implemented.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ SingleEmissionCheckScope guard(this);
+ eor(zd.VnD(), zn.VnD(), zm.VnD());
+ }
+ void Eors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ eors(pd, pg, pn, pm);
+ }
+ void Eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ eorv(vd, pg, zn);
+ }
+ void Ext(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned offset);
+ void Fabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option);
+ void Fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fabs(zd, pg, zn);
+ }
+ void Facge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ facge(pd, pg, zn, zm);
+ }
+ void Facgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ facgt(pd, pg, zn, zm);
+ }
+ void Facle(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ facge(pd, pg, zm, zn);
+ }
+ void Faclt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ facgt(pd, pg, zm, zn);
+ }
+ void Fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fadd(zd, pg, zd, imm);
+ }
+ void Fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option);
+ void Fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fadd(zd, zn, zm);
+ }
+ void Fadda(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fadda(vd, pg, vn, zm);
+ }
+ void Faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ faddv(vd, pg, zn);
+ }
+ void Fcadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+ void Fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (zero == 0.0) {
+ fcmeq(pd, pg, zn, zero);
+ } else {
+ // TODO: Synthesise other immediates.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmeq(pd, pg, zn, zm);
+ }
+ void Fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (zero == 0.0) {
+ fcmge(pd, pg, zn, zero);
+ } else {
+ // TODO: Synthesise other immediates.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmge(pd, pg, zn, zm);
+ }
+ void Fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (zero == 0.0) {
+ fcmgt(pd, pg, zn, zero);
+ } else {
+ // TODO: Synthesise other immediates.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmgt(pd, pg, zn, zm);
+ }
+ void Fcmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zda, pg, zda);
+ fcmla(zda, pg, zn, zm, rot);
+ }
+ void Fcmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmla(zda, zn, zm, index, rot);
+ }
+ void Fcmle(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (zero == 0.0) {
+ fcmle(pd, pg, zn, zero);
+ } else {
+ // TODO: Synthesise other immediates.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Fcmle(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmge(pd, pg, zm, zn);
+ }
+ void Fcmlt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (zero == 0.0) {
+ fcmlt(pd, pg, zn, zero);
+ } else {
+ // TODO: Synthesise other immediates.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Fcmlt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmgt(pd, pg, zm, zn);
+ }
+ void Fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (zero == 0.0) {
+ fcmne(pd, pg, zn, zero);
+ } else {
+ // TODO: Synthesise other immediates.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmne(pd, pg, zn, zm);
+ }
+ void Fcmuo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcmuo(pd, pg, zn, zm);
+ }
+ void Fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
+ void Fcpy(const ZRegister& zd, const PRegisterM& pg, float imm);
+ void Fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm);
+ void Fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcvt(zd, pg, zn);
+ }
+ void Fcvt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ // The element type in this predicated movprfx is determined by the larger
+ // type between the source and destination.
+ int lane_size = std::max(zd.GetLaneSizeInBits(), zn.GetLaneSizeInBits());
+ MovprfxHelperScope guard(this,
+ zd.WithLaneSize(lane_size),
+ pg,
+ zn.WithLaneSize(lane_size));
+ fcvt(zd, pg.Merging(), zn);
+ }
+ void Fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcvtzs(zd, pg, zn);
+ }
+ void Fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcvtzu(zd, pg, zn);
+ }
+ void Fdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fdup(const ZRegister& zd, double imm);
+ void Fdup(const ZRegister& zd, float imm);
+ void Fdup(const ZRegister& zd, Float16 imm);
+ void Fexpa(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fexpa(zd, zn);
+ }
+ void Fmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fmad(zdn, pg, zm, za);
+ }
+ void Fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fmax(zd, pg, zd, imm);
+ }
+ void Fmax(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+ void Fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fmaxnm(zd, pg, zd, imm);
+ }
+ void Fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option);
+ void Fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fmaxnmv(vd, pg, zn);
+ }
+ void Fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fmaxv(vd, pg, zn);
+ }
+ void Fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fmin(zd, pg, zd, imm);
+ }
+ void Fmin(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+ void Fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fminnm(zd, pg, zd, imm);
+ }
+ void Fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option);
+ void Fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fminnmv(vd, pg, zn);
+ }
+ void Fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fminv(vd, pg, zn);
+ }
+ // zd = za + (zn * zm)
+ void Fmla(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+ void Fmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ // zd = za - (zn * zm)
+ void Fmls(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+ void Fmls(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Fmov(const ZRegister& zd, double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Fdup(zd, imm);
+ }
+ void Fmov(const ZRegister& zd, float imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Fdup(zd, imm);
+ }
+ void Fmov(const ZRegister& zd, Float16 imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Fdup(zd, imm);
+ }
+ void Fmov(const ZRegister& zd, const PRegisterM& pg, double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Fcpy(zd, pg, imm);
+ }
+ void Fmov(const ZRegister& zd, const PRegisterM& pg, float imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Fcpy(zd, pg, imm);
+ }
+ void Fmov(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Fcpy(zd, pg, imm);
+ }
+ void Fmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fmsb(zdn, pg, zm, za);
+ }
+ void Fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fmul(zd, pg, zd, imm);
+ }
+ void Fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option);
+ void Fmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fmul(zd, zn, zm, index);
+ }
+ void Fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fmul(zd, zn, zm);
+ }
+ void Fmulx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option);
+ void Fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fneg(zd, pg, zn);
+ }
+ void Fnmla(
+ const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+ void Fnmls(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+ void Frecpe(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frecpe(zd, zn);
+ }
+ void Frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frecps(zd, zn, zm);
+ }
+ void Frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frecpx(zd, pg, zn);
+ }
+ void Frecpx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frecpx(zd, pg.Merging(), zn);
+ }
+ void Frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frinta(zd, pg, zn);
+ }
+ void Frinta(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frinta(zd, pg.Merging(), zn);
+ }
+ void Frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frinti(zd, pg, zn);
+ }
+ void Frinti(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frinti(zd, pg.Merging(), zn);
+ }
+ void Frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frintm(zd, pg, zn);
+ }
+ void Frintm(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frintm(zd, pg.Merging(), zn);
+ }
+ void Frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frintn(zd, pg, zn);
+ }
+ void Frintn(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frintn(zd, pg.Merging(), zn);
+ }
+ void Frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frintp(zd, pg, zn);
+ }
+ void Frintp(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frintp(zd, pg.Merging(), zn);
+ }
+ void Frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frintx(zd, pg, zn);
+ }
+ void Frintx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frintx(zd, pg.Merging(), zn);
+ }
+ void Frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frintz(zd, pg, zn);
+ }
+ void Frintz(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ frintz(zd, pg.Merging(), zn);
+ }
+ void Frsqrte(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frsqrte(zd, zn);
+ }
+ void Frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ frsqrts(zd, zn, zm);
+ }
+ void Fscale(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fsqrt(zd, pg, zn);
+ }
+ void Fsqrt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fsqrt(zd, pg.Merging(), zn);
+ }
+ void Fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fsub(zd, pg, zd, imm);
+ }
+ void Fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ double imm,
+ const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fsubr(zd, pg, zd, imm);
+ }
+ void Fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fsub(zd, zn, zm);
+ }
+ void Ftmad(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm3);
+ void Ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ftsmul(zd, zn, zm);
+ }
+ void Ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ftssel(zd, zn, zm);
+ }
+ void Incb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ incb(rdn, pattern, multiplier);
+ }
+ void Incd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ incd(rdn, pattern, multiplier);
+ }
+ void Incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ incd(zdn, pattern, multiplier);
+ }
+ void Inch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ inch(rdn, pattern, multiplier);
+ }
+ void Inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ inch(zdn, pattern, multiplier);
+ }
+ void Incp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ incp(rdn, pg);
+ }
+ void Incp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ // `incp` writes every lane, so use an unpredicated movprfx.
+ MovprfxHelperScope guard(this, zd, zn);
+ incp(zd, pg);
+ }
+ void Incp(const ZRegister& zdn, const PRegister& pg) { Incp(zdn, pg, zdn); }
+ void Incw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ incw(rdn, pattern, multiplier);
+ }
+ void Incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ incw(zdn, pattern, multiplier);
+ }
+ void Index(const ZRegister& zd, const Operand& start, const Operand& step);
+ void Insr(const ZRegister& zdn, const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ insr(zdn, rm);
+ }
+ void Insr(const ZRegister& zdn, const VRegister& vm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ insr(zdn, vm);
+ }
+ void Insr(const ZRegister& zdn, IntegerOperand imm);
+ void Lasta(const Register& rd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lasta(rd, pg, zn);
+ }
+ void Lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lasta(vd, pg, zn);
+ }
+ void Lastb(const Register& rd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lastb(rd, pg, zn);
+ }
+ void Lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lastb(vd, pg, zn);
+ }
+ void Ld1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1rb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rb,
+ kBRegSizeInBytes);
+ }
+ void Ld1rh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rh,
+ kHRegSizeInBytes);
+ }
+ void Ld1rw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rw,
+ kSRegSizeInBytes);
+ }
+ void Ld1rd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rd,
+ kDRegSizeInBytes);
+ }
+ void Ld1rqb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1rqd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1rqh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1rqw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1rsb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rsb,
+ kBRegSizeInBytes);
+ }
+ void Ld1rsh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rsh,
+ kHRegSizeInBytes);
+ }
+ void Ld1rsw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadBroadcastImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rsw,
+ kSRegSizeInBytes);
+ }
+ void Ld1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld2b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld2b(zt1, zt2, pg, addr);
+ }
+ void Ld2h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld2h(zt1, zt2, pg, addr);
+ }
+ void Ld2w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld2w(zt1, zt2, pg, addr);
+ }
+ void Ld2d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld2d(zt1, zt2, pg, addr);
+ }
+ void Ld3b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld3b(zt1, zt2, zt3, pg, addr);
+ }
+ void Ld3h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld3h(zt1, zt2, zt3, pg, addr);
+ }
+ void Ld3w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld3w(zt1, zt2, zt3, pg, addr);
+ }
+ void Ld3d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld3d(zt1, zt2, zt3, pg, addr);
+ }
+ void Ld4b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld4b(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void Ld4h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld4h(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void Ld4w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld4w(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void Ld4d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ld4d(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void Ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1b(zt, pg, xn, zm);
+ }
+ void Ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1b(zt, pg, zn, imm5);
+ }
+ void Ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1d(zt, pg, xn, zm);
+ }
+ void Ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1d(zt, pg, zn, imm5);
+ }
+ void Ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1h(zt, pg, xn, zm);
+ }
+ void Ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1h(zt, pg, zn, imm5);
+ }
+ void Ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1sb(zt, pg, xn, zm);
+ }
+ void Ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1sb(zt, pg, zn, imm5);
+ }
+ void Ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1sh(zt, pg, xn, zm);
+ }
+ void Ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1sh(zt, pg, zn, imm5);
+ }
+ void Ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1sw(zt, pg, xn, zm);
+ }
+ void Ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1sw(zt, pg, zn, imm5);
+ }
+ void Ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1w(zt, pg, xn, zm);
+ }
+ void Ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldff1w(zt, pg, zn, imm5);
+ }
+ void Ldnf1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1b(zt, pg, addr);
+ }
+ void Ldnf1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1d(zt, pg, addr);
+ }
+ void Ldnf1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1h(zt, pg, addr);
+ }
+ void Ldnf1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1sb(zt, pg, addr);
+ }
+ void Ldnf1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1sh(zt, pg, addr);
+ }
+ void Ldnf1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1sw(zt, pg, addr);
+ }
+ void Ldnf1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnf1w(zt, pg, addr);
+ }
+ void Ldnt1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldnt1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldnt1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldnt1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ldr(const CPURegister& rt, const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::ldr);
+ }
+ void Lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ lsl(zd, pg, zd, shift);
+ }
+ void Lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Lsl(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lsl(zd, zn, shift);
+ }
+ void Lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lsl(zd, zn, zm);
+ }
+ void Lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ lsr(zd, pg, zd, shift);
+ }
+ void Lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Lsr(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lsr(zd, zn, shift);
+ }
+ void Lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ lsr(zd, zn, zm);
+ }
+ void Mov(const PRegister& pd, const PRegister& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(pd.VnB(), pn.VnB());
+ }
+ void Mov(const PRegisterWithLaneSize& pd,
+ const PRegisterM& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(pd, pg, pn);
+ }
+ void Mov(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(pd, pg, pn);
+ }
+ void Mov(const ZRegister& zd, const Register& xn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, xn);
+ }
+
+ void Mov(const ZRegister& zd, const VRegister& vn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, vn);
+ }
+
+ void Mov(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, zn);
+ }
+ void Mov(const ZRegister& zd, const ZRegister& zn, unsigned index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, zn, index);
+ }
+ void Mov(const ZRegister& zd, const PRegister& pg, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Cpy(zd, pg, imm);
+ }
+ // TODO: support zeroing predicated moves using movprfx.
+ void Mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, pg, rn);
+ }
+ void Mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, pg, vn);
+ }
+ void Mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mov(zd, pg, zn);
+ }
+ void Mov(const ZRegister& zd, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Dup(zd, imm);
+ }
+ void Movs(const PRegister& pd, const PRegister& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ movs(pd, pn);
+ }
+ void Movs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ movs(pd, pg, pn);
+ }
+ // zd = za + (zn * zm)
+ void Mla(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ // zd = za - (zn * zm)
+ void Mls(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+ void Nand(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ nand(pd, pg, pn, pm);
+ }
+ void Nands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ nands(pd, pg, pn, pm);
+ }
+ // There is no instruction with this form, but we can implement it using
+ // `subr`.
+ void Neg(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, zn);
+ subr(zd, zd, 0);
+ }
+ void Neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ neg(zd, pg, zn);
+ }
+ void Nor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ nor(pd, pg, pn, pm);
+ }
+ void Nors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ nors(pd, pg, pn, pm);
+ }
+ void Not(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ not_(pd, pg, pn);
+ }
+ void Not(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ not_(zd, pg, zn);
+ }
+ void Nots(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ nots(pd, pg, pn);
+ }
+ void Orn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ orn(pd, pg, pn, pm);
+ }
+ void Orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+ orn(zd, zn, imm);
+ } else {
+ // TODO: Synthesise the immediate once 'Mov' is implemented.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Orns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ orns(pd, pg, pn, pm);
+ }
+ void Orr(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ orr(pd, pg, pn, pm);
+ }
+ void Orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+ orr(zd, zn, imm);
+ } else {
+ // TODO: Synthesise the immediate once 'Mov' is implemented.
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+ void Orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ SingleEmissionCheckScope guard(this);
+ orr(zd.VnD(), zn.VnD(), zm.VnD());
+ }
+ void Orrs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ orrs(pd, pg, pn, pm);
+ }
+ void Orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ orv(vd, pg, zn);
+ }
+ void Pfalse(const PRegister& pd) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(pd.IsUnqualified());
+ SingleEmissionCheckScope guard(this);
+ // No matter what the lane size is, overall this operation just writes zeros
+ // throughout the register.
+ pfalse(pd.VnB());
+ }
+ void Pfirst(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+ void Pnext(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn);
+ void Prfb(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ prfb(prfop, pg, addr);
+ }
+ void Prfh(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ prfh(prfop, pg, addr);
+ }
+ void Prfw(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ prfw(prfop, pg, addr);
+ }
+ void Prfd(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ prfd(prfop, pg, addr);
+ }
+ void Ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ptest(pg, pn);
+ }
+ void Ptrue(const PRegisterWithLaneSize& pd,
+ SVEPredicateConstraint pattern,
+ FlagsUpdate s);
+ void Ptrue(const PRegisterWithLaneSize& pd,
+ SVEPredicateConstraint pattern = SVE_ALL) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ptrue(pd, pattern);
+ }
+ void Ptrues(const PRegisterWithLaneSize& pd,
+ SVEPredicateConstraint pattern = SVE_ALL) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ptrues(pd, pattern);
+ }
+ void Punpkhi(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ punpkhi(pd, pn);
+ }
+ void Punpklo(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ punpklo(pd, pn);
+ }
+ void Rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rbit(zd, pg, zn);
+ }
+ void Rdffr(const PRegister& pd) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ // Although this is essentially just a move, it writes every bit and so can
+ // only support b-sized lane because other lane sizes would simplicity clear
+ // bits in `pd`.
+ VIXL_ASSERT(!pd.HasLaneSize() || pd.IsLaneSizeB());
+ VIXL_ASSERT(pd.IsUnqualified());
+ SingleEmissionCheckScope guard(this);
+ rdffr(pd.VnB());
+ }
+ void Rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rdffr(pd, pg);
+ }
+ void Rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rdffrs(pd, pg);
+ }
+ // Note that there is no `rdpl` instruction, but this macro emulates it (for
+ // symmetry with `Rdvl`).
+ void Rdpl(const Register& xd, int64_t multiplier) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Addpl(xd, xzr, multiplier);
+ }
+ void Rdvl(const Register& xd, int64_t multiplier) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ Addvl(xd, xzr, multiplier);
+ }
+ void Rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rev(pd, pn);
+ }
+ void Rev(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rev(zd, zn);
+ }
+ void Revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ revb(zd, pg, zn);
+ }
+ void Revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ revh(zd, pg, zn);
+ }
+ void Revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ revw(zd, pg, zn);
+ }
+ void Saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ saddv(dd, pg, zn);
+ }
+ void Scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ scvtf(zd, pg, zn);
+ }
+ void Sdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sel(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sel(pd, pg, pn, pm);
+ }
+ void Sel(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sel(zd, pg, zn, zm);
+ }
+ void Setffr() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ setffr();
+ }
+ void Smax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+ void Smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ smaxv(vd, pg, zn);
+ }
+ void Smin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+ void Sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sminv(vd, pg, zn);
+ }
+ void Splice(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqadd(zd, zn, zm);
+ }
+ void Sqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.IsUint8() ||
+ (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+ MovprfxHelperScope guard(this, zd, zn);
+ sqadd(zd, zd, imm.AsUint16());
+ }
+ void Sqdecb(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecb(xd, wn, pattern, multiplier);
+ }
+ void Sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecb(rdn, pattern, multiplier);
+ }
+ void Sqdecd(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecd(xd, wn, pattern, multiplier);
+ }
+ void Sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecd(rdn, pattern, multiplier);
+ }
+ void Sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecd(zdn, pattern, multiplier);
+ }
+ void Sqdech(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdech(xd, wn, pattern, multiplier);
+ }
+ void Sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdech(rdn, pattern, multiplier);
+ }
+ void Sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdech(zdn, pattern, multiplier);
+ }
+ void Sqdecp(const Register& xdn,
+ const PRegisterWithLaneSize& pg,
+ const Register& wdn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecp(xdn, pg, wdn);
+ }
+ void Sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecp(xdn, pg);
+ }
+ void Sqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ // `sqdecp` writes every lane, so use an unpredicated movprfx.
+ MovprfxHelperScope guard(this, zd, zn);
+ sqdecp(zd, pg);
+ }
+ void Sqdecp(const ZRegister& zdn, const PRegister& pg) {
+ Sqdecp(zdn, pg, zdn);
+ }
+ void Sqdecw(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecw(xd, wn, pattern, multiplier);
+ }
+ void Sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecw(rdn, pattern, multiplier);
+ }
+ void Sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdecw(zdn, pattern, multiplier);
+ }
+ void Sqincb(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincb(xd, wn, pattern, multiplier);
+ }
+ void Sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincb(rdn, pattern, multiplier);
+ }
+ void Sqincd(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincd(xd, wn, pattern, multiplier);
+ }
+ void Sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincd(rdn, pattern, multiplier);
+ }
+ void Sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincd(zdn, pattern, multiplier);
+ }
+ void Sqinch(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqinch(xd, wn, pattern, multiplier);
+ }
+ void Sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqinch(rdn, pattern, multiplier);
+ }
+ void Sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqinch(zdn, pattern, multiplier);
+ }
+ void Sqincp(const Register& xdn,
+ const PRegisterWithLaneSize& pg,
+ const Register& wdn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincp(xdn, pg, wdn);
+ }
+ void Sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincp(xdn, pg);
+ }
+ void Sqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ // `sqincp` writes every lane, so use an unpredicated movprfx.
+ MovprfxHelperScope guard(this, zd, zn);
+ sqincp(zd, pg);
+ }
+ void Sqincp(const ZRegister& zdn, const PRegister& pg) {
+ Sqincp(zdn, pg, zdn);
+ }
+ void Sqincw(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincw(xd, wn, pattern, multiplier);
+ }
+ void Sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincw(rdn, pattern, multiplier);
+ }
+ void Sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqincw(zdn, pattern, multiplier);
+ }
+ void Sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqsub(zd, zn, zm);
+ }
+ void Sqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.IsUint8() ||
+ (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+ MovprfxHelperScope guard(this, zd, zn);
+ sqsub(zd, zd, imm.AsUint16());
+ }
+ void St1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void St1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void St1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void St1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void St2b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st2b(zt1, zt2, pg, addr);
+ }
+ void St2h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st2h(zt1, zt2, pg, addr);
+ }
+ void St2w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st2w(zt1, zt2, pg, addr);
+ }
+ void St2d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st2d(zt1, zt2, pg, addr);
+ }
+ void St3b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st3b(zt1, zt2, zt3, pg, addr);
+ }
+ void St3h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st3h(zt1, zt2, zt3, pg, addr);
+ }
+ void St3w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st3w(zt1, zt2, zt3, pg, addr);
+ }
+ void St3d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st3d(zt1, zt2, zt3, pg, addr);
+ }
+ void St4b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st4b(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void St4h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st4h(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void St4w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st4w(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void St4d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ st4d(zt1, zt2, zt3, zt4, pg, addr);
+ }
+ void Stnt1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void Stnt1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void Stnt1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void Stnt1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+ void Str(const CPURegister& rt, const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::str);
+ }
+ void Sub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sub(zd, zn, zm);
+ }
+ void Sub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ AddSubHelper(kSubImmediate, zd, zn, imm);
+ }
+ void Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm);
+ void Sunpkhi(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sunpkhi(zd, zn);
+ }
+ void Sunpklo(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sunpklo(zd, zn);
+ }
+ void Sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sxtb(zd, pg, zn);
+ }
+ void Sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sxth(zd, pg, zn);
+ }
+ void Sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sxtw(zd, pg, zn);
+ }
+ void Tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ tbl(zd, zn, zm);
+ }
+ void Trn1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ trn1(pd, pn, pm);
+ }
+ void Trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ trn1(zd, zn, zm);
+ }
+ void Trn2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ trn2(pd, pn, pm);
+ }
+ void Trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ trn2(zd, zn, zm);
+ }
+ void Uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uaddv(dd, pg, zn);
+ }
+ void Ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ucvtf(zd, pg, zn);
+ }
+ void Udiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Udot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Udot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+ void Umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ umaxv(vd, pg, zn);
+ }
+ void Umin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+ void Uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uminv(vd, pg, zn);
+ }
+ void Uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqadd(zd, zn, zm);
+ }
+ void Uqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.IsUint8() ||
+ (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+ MovprfxHelperScope guard(this, zd, zn);
+ uqadd(zd, zd, imm.AsUint16());
+ }
+ void Uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdecb(rdn, pattern, multiplier);
+ }
+ void Uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdecd(rdn, pattern, multiplier);
+ }
+ void Uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdecd(zdn, pattern, multiplier);
+ }
+ void Uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdech(rdn, pattern, multiplier);
+ }
+ void Uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdech(zdn, pattern, multiplier);
+ }
+ // The saturation is based on the size of `rn`. The result is zero-extended
+ // into `rd`, which must be at least as big.
+ void Uqdecp(const Register& rd,
+ const PRegisterWithLaneSize& pg,
+ const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(rd.Aliases(rn));
+ VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes());
+ SingleEmissionCheckScope guard(this);
+ if (rn.Is64Bits()) {
+ uqdecp(rd, pg);
+ } else {
+ // Convert <Xd> into <Wd>, to make this more consistent with Sqdecp.
+ uqdecp(rd.W(), pg);
+ }
+ }
+ void Uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ Uqdecp(rdn, pg, rdn);
+ }
+ void Uqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ // `sqdecp` writes every lane, so use an unpredicated movprfx.
+ MovprfxHelperScope guard(this, zd, zn);
+ uqdecp(zd, pg);
+ }
+ void Uqdecp(const ZRegister& zdn, const PRegister& pg) {
+ Uqdecp(zdn, pg, zdn);
+ }
+ void Uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdecw(rdn, pattern, multiplier);
+ }
+ void Uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqdecw(zdn, pattern, multiplier);
+ }
+ void Uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqincb(rdn, pattern, multiplier);
+ }
+ void Uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqincd(rdn, pattern, multiplier);
+ }
+ void Uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqincd(zdn, pattern, multiplier);
+ }
+ void Uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqinch(rdn, pattern, multiplier);
+ }
+ void Uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqinch(zdn, pattern, multiplier);
+ }
+ // The saturation is based on the size of `rn`. The result is zero-extended
+ // into `rd`, which must be at least as big.
+ void Uqincp(const Register& rd,
+ const PRegisterWithLaneSize& pg,
+ const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(rd.Aliases(rn));
+ VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes());
+ SingleEmissionCheckScope guard(this);
+ if (rn.Is64Bits()) {
+ uqincp(rd, pg);
+ } else {
+ // Convert <Xd> into <Wd>, to make this more consistent with Sqincp.
+ uqincp(rd.W(), pg);
+ }
+ }
+ void Uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+ Uqincp(rdn, pg, rdn);
+ }
+ void Uqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(zd, zn));
+ // `sqincp` writes every lane, so use an unpredicated movprfx.
+ MovprfxHelperScope guard(this, zd, zn);
+ uqincp(zd, pg);
+ }
+ void Uqincp(const ZRegister& zdn, const PRegister& pg) {
+ Uqincp(zdn, pg, zdn);
+ }
+ void Uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqincw(rdn, pattern, multiplier);
+ }
+ void Uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqincw(zdn, pattern, multiplier);
+ }
+ void Uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqsub(zd, zn, zm);
+ }
+ void Uqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.IsUint8() ||
+ (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+ MovprfxHelperScope guard(this, zd, zn);
+ uqsub(zd, zd, imm.AsUint16());
+ }
+ void Uunpkhi(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uunpkhi(zd, zn);
+ }
+ void Uunpklo(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uunpklo(zd, zn);
+ }
+ void Uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uxtb(zd, pg, zn);
+ }
+ void Uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uxth(zd, pg, zn);
+ }
+ void Uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uxtw(zd, pg, zn);
+ }
+ void Uzp1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uzp1(pd, pn, pm);
+ }
+ void Uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uzp1(zd, zn, zm);
+ }
+ void Uzp2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uzp2(pd, pn, pm);
+ }
+ void Uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uzp2(zd, zn, zm);
+ }
+ void Whilele(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilele(pd, rn, rm);
+ }
+ void Whilelo(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilelo(pd, rn, rm);
+ }
+ void Whilels(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilels(pd, rn, rm);
+ }
+ void Whilelt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilelt(pd, rn, rm);
+ }
+ void Wrffr(const PRegister& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ // Although this is essentially just a move, it writes every bit and so can
+ // only support b-sized lane because other lane sizes would implicitly clear
+ // bits in `ffr`.
+ VIXL_ASSERT(!pn.HasLaneSize() || pn.IsLaneSizeB());
+ VIXL_ASSERT(pn.IsUnqualified());
+ SingleEmissionCheckScope guard(this);
+ wrffr(pn.VnB());
+ }
+ void Zip1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ zip1(pd, pn, pm);
+ }
+ void Zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ zip1(zd, zn, zm);
+ }
+ void Zip2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ zip2(pd, pn, pm);
+ }
+ void Zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ zip2(zd, zn, zm);
+ }
+
template <typename T>
Literal<T>* CreateLiteralDestroyedWithPool(T value) {
return new Literal<T>(value,
@@ -3480,11 +6489,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
return GetScratchRegisterList();
}
- CPURegList* GetScratchVRegisterList() { return &fptmp_list_; }
+ CPURegList* GetScratchVRegisterList() { return &v_tmp_list_; }
VIXL_DEPRECATED("GetScratchVRegisterList", CPURegList* FPTmpList()) {
return GetScratchVRegisterList();
}
+ CPURegList* GetScratchPRegisterList() { return &p_tmp_list_; }
+
// Get or set the current (most-deeply-nested) UseScratchRegisterScope.
void SetCurrentScratchRegisterScope(UseScratchRegisterScope* scope) {
current_scratch_scope_ = scope;
@@ -3548,16 +6559,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
// Will output the flags.
void Log(TraceParameters parameters);
- // Enable or disable instrumentation when an Instrument visitor is attached to
- // the simulator.
- void EnableInstrumentation();
- void DisableInstrumentation();
-
- // Add a marker to the instrumentation data produced by an Instrument visitor.
- // The name is a two character string that will be attached to the marker in
- // the output data.
- void AnnotateInstrumentation(const char* marker_name);
-
// Enable or disable CPU features dynamically. This mechanism allows users to
// strictly check the use of CPU features in different regions of code.
void SetSimulatorCPUFeatures(const CPUFeatures& features);
@@ -3661,6 +6662,36 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
Condition cond,
bool* should_synthesise_left);
+ // Generate code to calculate the address represented by `addr` and write it
+ // into `xd`. This is used as a common fall-back for out-of-range load and
+ // store operands.
+ //
+ // The vl_divisor_log2 argument is used to scale the VL, for use with
+ // SVE_MUL_VL.
+ void CalculateSVEAddress(const Register& xd,
+ const SVEMemOperand& addr,
+ int vl_divisor_log2 = 0);
+
+ void CalculateSVEAddress(const Register& xd,
+ const SVEMemOperand& addr,
+ const CPURegister& rt) {
+ VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister());
+ int vl_divisor_log2 = rt.IsPRegister() ? kZRegBitsPerPRegBitLog2 : 0;
+ CalculateSVEAddress(xd, addr, vl_divisor_log2);
+ }
+
+ void SetFPNaNPropagationOption(FPMacroNaNPropagationOption nan_option) {
+ fp_nan_propagation_ = nan_option;
+ }
+
+ void ResolveFPNaNPropagationOption(FPMacroNaNPropagationOption* nan_option) {
+ // The input option has priority over the option that has set.
+ if (*nan_option == NoFPMacroNaNPropagationSelected) {
+ *nan_option = fp_nan_propagation_;
+ }
+ VIXL_ASSERT(*nan_option != NoFPMacroNaNPropagationSelected);
+ }
+
private:
// The actual Push and Pop implementations. These don't generate any code
// other than that required for the push or pop. This allows
@@ -3714,6 +6745,183 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features,
DebugHltOpcode action);
+ void CompareHelper(Condition cond,
+ const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm);
+
+ // E.g. Ld1rb.
+ typedef void (Assembler::*SVELoadBroadcastFn)(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ void SVELoadBroadcastImmHelper(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ SVELoadBroadcastFn fn,
+ int divisor);
+
+ // E.g. ldr/str
+ typedef void (Assembler::*SVELoadStoreFn)(const CPURegister& rt,
+ const SVEMemOperand& addr);
+
+ void SVELoadStoreScalarImmHelper(const CPURegister& rt,
+ const SVEMemOperand& addr,
+ SVELoadStoreFn fn);
+
+ typedef void (Assembler::*SVELoad1Fn)(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ typedef void (Assembler::*SVEStore1Fn)(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr);
+
+ // Helper for predicated Z register loads with addressing modes not directly
+ // encodable in the instruction. The supported_modifier parameter indicates
+ // which offset modifier the calling instruction encoder supports (eg.
+ // SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as
+ // vl_divisor_log2; pass -1 to indicate no dependency.
+ template <typename Tg, typename Tf>
+ void SVELoadStoreScalarImmHelper(
+ const ZRegister& zt,
+ const Tg& pg,
+ const SVEMemOperand& addr,
+ Tf fn,
+ int imm_bits,
+ int shift_amount,
+ SVEOffsetModifier supported_modifier = NO_SVE_OFFSET_MODIFIER,
+ int vl_divisor_log2 = 0);
+
+ template <typename Tg, typename Tf>
+ void SVELoadStore1Helper(int msize_in_bytes_log2,
+ const ZRegister& zt,
+ const Tg& pg,
+ const SVEMemOperand& addr,
+ Tf fn);
+
+ template <typename Tf>
+ void SVELoadFFHelper(int msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ Tf fn);
+
+ typedef void (MacroAssembler::*IntWideImmMacroFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm);
+
+ typedef void (Assembler::*IntWideImmShiftFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm,
+ int shift);
+
+ typedef void (Assembler::*IntArithFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ typedef void (Assembler::*IntWideImmFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm);
+
+ typedef void (Assembler::*IntArithIndexFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ void IntWideImmHelper(IntWideImmFn imm_fn,
+ SVEArithPredicatedFn reg_fn,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm,
+ bool is_signed_imm);
+
+ enum AddSubHelperOption { kAddImmediate, kSubImmediate };
+
+ void AddSubHelper(AddSubHelperOption option,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm);
+
+ // Try to emit an add- or sub-like instruction (imm_fn) with `imm`, or the
+ // corresponding sub- or add-like instruction (n_imm_fn) with a negated `imm`.
+ // A `movprfx` is automatically generated if one is required. If successful,
+ // return true. Otherwise, return false.
+ //
+ // This helper uses two's complement equivalences, for example treating 0xffff
+ // as -1 for H-sized lanes.
+ bool TrySingleAddSub(AddSubHelperOption option,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm);
+
+ void SVESdotUdotHelper(IntArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ void SVESdotUdotIndexHelper(IntArithIndexFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // For noncommutative arithmetic operations.
+ void NoncommutativeArithmeticHelper(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEArithPredicatedFn fn,
+ SVEArithPredicatedFn rev_fn);
+
+ void FPCommutativeArithmeticHelper(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEArithPredicatedFn fn,
+ FPMacroNaNPropagationOption nan_option);
+
+ // Floating-point fused multiply-add vectors (predicated), writing addend.
+ typedef void (Assembler::*SVEMulAddPredicatedZdaFn)(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point fused multiply-add vectors (predicated), writing
+ // multiplicand.
+ typedef void (Assembler::*SVEMulAddPredicatedZdnFn)(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ void FPMulAddHelper(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEMulAddPredicatedZdaFn fn_zda,
+ SVEMulAddPredicatedZdnFn fn_zdn,
+ FPMacroNaNPropagationOption nan_option);
+
+ typedef void (Assembler::*SVEMulAddIndexFn)(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ void FPMulAddIndexHelper(SVEMulAddIndexFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
// Tell whether any of the macro instruction can be used. When false the
// MacroAssembler will assert if a method which can emit a variable number
// of instructions is called.
@@ -3727,7 +6935,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
// Scratch registers available for use by the MacroAssembler.
CPURegList tmp_list_;
- CPURegList fptmp_list_;
+ CPURegList v_tmp_list_;
+ CPURegList p_tmp_list_;
UseScratchRegisterScope* current_scratch_scope_;
@@ -3737,6 +6946,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
ptrdiff_t checkpoint_;
ptrdiff_t recommended_checkpoint_;
+ FPMacroNaNPropagationOption fp_nan_propagation_;
+
friend class Pool;
friend class LiteralPool;
};
@@ -3805,11 +7016,35 @@ class BlockPoolsScope {
MacroAssembler* masm_;
};
+MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm,
+ const ZRegister& dst,
+ const ZRegister& src)
+ : ExactAssemblyScope(masm,
+ ShouldGenerateMovprfx(dst, src)
+ ? (2 * kInstructionSize)
+ : kInstructionSize) {
+ if (ShouldGenerateMovprfx(dst, src)) {
+ masm->movprfx(dst, src);
+ }
+}
+
+MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm,
+ const ZRegister& dst,
+ const PRegister& pg,
+ const ZRegister& src)
+ : ExactAssemblyScope(masm,
+ ShouldGenerateMovprfx(dst, pg, src)
+ ? (2 * kInstructionSize)
+ : kInstructionSize) {
+ if (ShouldGenerateMovprfx(dst, pg, src)) {
+ masm->movprfx(dst, pg, src);
+ }
+}
// This scope utility allows scratch registers to be managed safely. The
-// MacroAssembler's GetScratchRegisterList() (and GetScratchVRegisterList()) is
-// used as a pool of scratch registers. These registers can be allocated on
-// demand, and will be returned at the end of the scope.
+// MacroAssembler's GetScratch*RegisterList() are used as a pool of scratch
+// registers. These registers can be allocated on demand, and will be returned
+// at the end of the scope.
//
// When the scope ends, the MacroAssembler's lists will be restored to their
// original state, even if the lists were modified by some other means.
@@ -3819,14 +7054,22 @@ class UseScratchRegisterScope {
// must not be `NULL`), so it is ready to use immediately after it has been
// constructed.
explicit UseScratchRegisterScope(MacroAssembler* masm)
- : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) {
+ : masm_(NULL),
+ parent_(NULL),
+ old_available_(0),
+ old_available_v_(0),
+ old_available_p_(0) {
Open(masm);
}
// This constructor does not implicitly initialise the scope. Instead, the
// user is required to explicitly call the `Open` function before using the
// scope.
UseScratchRegisterScope()
- : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) {}
+ : masm_(NULL),
+ parent_(NULL),
+ old_available_(0),
+ old_available_v_(0),
+ old_available_p_(0) {}
// This function performs the actual initialisation work.
void Open(MacroAssembler* masm);
@@ -3841,25 +7084,42 @@ class UseScratchRegisterScope {
bool IsAvailable(const CPURegister& reg) const;
-
// Take a register from the appropriate temps list. It will be returned
// automatically when the scope ends.
Register AcquireW() {
- return AcquireNextAvailable(masm_->GetScratchRegisterList()).W();
+ return AcquireFrom(masm_->GetScratchRegisterList()).W();
}
Register AcquireX() {
- return AcquireNextAvailable(masm_->GetScratchRegisterList()).X();
+ return AcquireFrom(masm_->GetScratchRegisterList()).X();
}
VRegister AcquireH() {
- return AcquireNextAvailable(masm_->GetScratchVRegisterList()).H();
+ return AcquireFrom(masm_->GetScratchVRegisterList()).H();
}
VRegister AcquireS() {
- return AcquireNextAvailable(masm_->GetScratchVRegisterList()).S();
+ return AcquireFrom(masm_->GetScratchVRegisterList()).S();
}
VRegister AcquireD() {
- return AcquireNextAvailable(masm_->GetScratchVRegisterList()).D();
+ return AcquireFrom(masm_->GetScratchVRegisterList()).D();
+ }
+ ZRegister AcquireZ() {
+ return AcquireFrom(masm_->GetScratchVRegisterList()).Z();
+ }
+ PRegister AcquireP() {
+ // Prefer to allocate p8-p15 if we can, to leave p0-p7 available for use as
+ // governing predicates.
+ CPURegList* available = masm_->GetScratchPRegisterList();
+ RegList preferred = ~kGoverningPRegisterMask;
+ if ((available->GetList() & preferred) != 0) {
+ return AcquireFrom(available, preferred).P();
+ }
+ return AcquireFrom(available).P();
+ }
+ // Acquire a P register suitable for use as a governing predicate in
+ // instructions which only accept p0-p7 for that purpose.
+ PRegister AcquireGoverningP() {
+ CPURegList* available = masm_->GetScratchPRegisterList();
+ return AcquireFrom(available, kGoverningPRegisterMask).P();
}
-
Register AcquireRegisterOfSize(int size_in_bits);
Register AcquireSameSizeAs(const Register& reg) {
@@ -3875,6 +7135,12 @@ class UseScratchRegisterScope {
: CPURegister(AcquireRegisterOfSize(size_in_bits));
}
+ // Acquire a register big enough to represent one lane of `vector`.
+ Register AcquireRegisterToHoldLane(const CPURegister& vector) {
+ VIXL_ASSERT(vector.GetLaneSizeInBits() <= kXRegSize);
+ return (vector.GetLaneSizeInBits() > kWRegSize) ? AcquireX() : AcquireW();
+ }
+
// Explicitly release an acquired (or excluded) register, putting it back in
// the appropriate temps list.
@@ -3892,6 +7158,10 @@ class UseScratchRegisterScope {
const VRegister& reg2 = NoVReg,
const VRegister& reg3 = NoVReg,
const VRegister& reg4 = NoVReg);
+ void Include(const CPURegister& reg1,
+ const CPURegister& reg2 = NoCPUReg,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg);
// Make sure that the specified registers are not available in this scope.
@@ -3911,21 +7181,40 @@ class UseScratchRegisterScope {
const CPURegister& reg3 = NoCPUReg,
const CPURegister& reg4 = NoCPUReg);
+ // Convenience for excluding registers that are part of Operands. This is
+ // useful for sequences like this:
+ //
+ // // Use 'rd' as a scratch, but only if it's not aliased by an input.
+ // temps.Include(rd);
+ // temps.Exclude(rn);
+ // temps.Exclude(operand);
+ //
+ // Otherwise, a conditional check is needed on the last 'Exclude'.
+ void Exclude(const Operand& operand) {
+ if (operand.IsShiftedRegister() || operand.IsExtendedRegister()) {
+ Exclude(operand.GetRegister());
+ } else {
+ VIXL_ASSERT(operand.IsImmediate());
+ }
+ }
// Prevent any scratch registers from being used in this scope.
void ExcludeAll();
private:
- static CPURegister AcquireNextAvailable(CPURegList* available);
+ static CPURegister AcquireFrom(CPURegList* available,
+ RegList mask = ~static_cast<RegList>(0));
static void ReleaseByCode(CPURegList* available, int code);
-
static void ReleaseByRegList(CPURegList* available, RegList regs);
-
static void IncludeByRegList(CPURegList* available, RegList exclude);
-
static void ExcludeByRegList(CPURegList* available, RegList exclude);
+ CPURegList* GetAvailableListFor(CPURegister::RegisterBank bank);
+
+ static const RegList kGoverningPRegisterMask =
+ (static_cast<RegList>(1) << kNumberOfGoverningPRegisters) - 1;
+
// The MacroAssembler maintains a list of available scratch registers, and
// also keeps track of the most recently-opened scope so that on destruction
// we can check that scopes do not outlive their parents.
@@ -3934,7 +7223,8 @@ class UseScratchRegisterScope {
// The state of the available lists at the start of this scope.
RegList old_available_; // kRegister
- RegList old_available_v_; // kVRegister
+ RegList old_available_v_; // kVRegister / kZRegister
+ RegList old_available_p_; // kPRegister
// Disallow copy constructor and operator=.
VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope(
@@ -3955,23 +7245,11 @@ class UseScratchRegisterScope {
// features needs a corresponding macro instruction.
class SimulationCPUFeaturesScope {
public:
- explicit SimulationCPUFeaturesScope(
- MacroAssembler* masm,
- CPUFeatures::Feature feature0 = CPUFeatures::kNone,
- CPUFeatures::Feature feature1 = CPUFeatures::kNone,
- CPUFeatures::Feature feature2 = CPUFeatures::kNone,
- CPUFeatures::Feature feature3 = CPUFeatures::kNone)
- : masm_(masm),
- cpu_features_scope_(masm, feature0, feature1, feature2, feature3) {
- masm_->SaveSimulatorCPUFeatures();
- masm_->EnableSimulatorCPUFeatures(
- CPUFeatures(feature0, feature1, feature2, feature3));
- }
-
- SimulationCPUFeaturesScope(MacroAssembler* masm, const CPUFeatures& other)
- : masm_(masm), cpu_features_scope_(masm, other) {
+ template <typename... T>
+ explicit SimulationCPUFeaturesScope(MacroAssembler* masm, T... features)
+ : masm_(masm), cpu_features_scope_(masm, features...) {
masm_->SaveSimulatorCPUFeatures();
- masm_->EnableSimulatorCPUFeatures(other);
+ masm_->EnableSimulatorCPUFeatures(CPUFeatures(features...));
}
~SimulationCPUFeaturesScope() { masm_->RestoreSimulatorCPUFeatures(); }
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
new file mode 100644
index 00000000..b107f132
--- /dev/null
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -0,0 +1,2027 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "macro-assembler-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+void MacroAssembler::AddSubHelper(AddSubHelperOption option,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(imm.FitsInLane(zd));
+
+ // Simple, encodable cases.
+ if (TrySingleAddSub(option, zd, zn, imm)) return;
+
+ VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
+ bool add_imm = (option == kAddImmediate);
+
+ // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
+ // instruction. Also interpret the immediate as signed, so we can convert
+ // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
+ IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
+ if (signed_imm.IsNegative()) {
+ AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
+ IntegerOperand n_imm(signed_imm.GetMagnitude());
+ // IntegerOperand can represent -INT_MIN, so this is always safe.
+ VIXL_ASSERT(n_imm.IsPositiveOrZero());
+ if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
+ }
+
+ // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
+ Dup(scratch, imm);
+
+ SingleEmissionCheckScope guard(this);
+ if (add_imm) {
+ add(zd, zn, scratch);
+ } else {
+ sub(zd, zn, scratch);
+ }
+}
+
+bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(imm.FitsInLane(zd));
+
+ int imm8;
+ int shift = -1;
+ if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
+ imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
+ MovprfxHelperScope guard(this, zd, zn);
+ switch (option) {
+ case kAddImmediate:
+ add(zd, zd, imm8, shift);
+ return true;
+ case kSubImmediate:
+ sub(zd, zd, imm8, shift);
+ return true;
+ }
+ }
+ return false;
+}
+
+void MacroAssembler::IntWideImmHelper(IntWideImmFn imm_fn,
+ SVEArithPredicatedFn reg_macro,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm,
+ bool is_signed) {
+ if (is_signed) {
+ // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
+ if (imm.IsInt8()) {
+ MovprfxHelperScope guard(this, zd, zn);
+ (this->*imm_fn)(zd, zd, imm.AsInt8());
+ return;
+ }
+ } else {
+ // E.g. UMIN_z_zi, UMAX_z_zi
+ if (imm.IsUint8()) {
+ MovprfxHelperScope guard(this, zd, zn);
+ (this->*imm_fn)(zd, zd, imm.AsUint8());
+ return;
+ }
+ }
+
+ UseScratchRegisterScope temps(this);
+ PRegister pg = temps.AcquireGoverningP();
+ Ptrue(pg.WithSameLaneSizeAs(zd));
+
+ // Try to re-use zd if we can, so we can avoid a movprfx.
+ ZRegister scratch =
+ zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
+ : zd;
+ Dup(scratch, imm);
+
+ // The vector-form macro for commutative operations will swap the arguments to
+ // avoid movprfx, if necessary.
+ (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
+}
+
+void MacroAssembler::Mul(const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ IntWideImmFn imm_fn = &Assembler::mul;
+ SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
+ IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
+}
+
+void MacroAssembler::Smin(const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInSignedLane(zd));
+ IntWideImmFn imm_fn = &Assembler::smin;
+ SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
+ IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
+}
+
+void MacroAssembler::Smax(const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInSignedLane(zd));
+ IntWideImmFn imm_fn = &Assembler::smax;
+ SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
+ IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
+}
+
+void MacroAssembler::Umax(const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
+ IntWideImmFn imm_fn = &Assembler::umax;
+ SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
+ IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
+}
+
+void MacroAssembler::Umin(const ZRegister& zd,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
+ IntWideImmFn imm_fn = &Assembler::umin;
+ SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
+ IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
+}
+
+void MacroAssembler::Addpl(const Register& xd,
+ const Register& xn,
+ int64_t multiplier) {
+ VIXL_ASSERT(allow_macro_instructions_);
+
+ // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
+ // `VL * multiplier` cannot overflow, for any possible value of VL.
+ VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
+ VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
+
+ if (xd.IsZero()) return;
+ if (xn.IsZero() && xd.IsSP()) {
+ // TODO: This operation doesn't make much sense, but we could support it
+ // with a scratch register if necessary.
+ VIXL_UNIMPLEMENTED();
+ }
+
+ // Handling xzr requires an extra move, so defer it until later so we can try
+ // to use `rdvl` instead (via `Addvl`).
+ if (IsInt6(multiplier) && !xn.IsZero()) {
+ SingleEmissionCheckScope guard(this);
+ addpl(xd, xn, static_cast<int>(multiplier));
+ return;
+ }
+
+ // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
+ if ((multiplier % kZRegBitsPerPRegBit) == 0) {
+ Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
+ return;
+ }
+
+ if (IsInt6(multiplier)) {
+ VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`.
+ // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
+ // materialise a zero.
+ MacroEmissionCheckScope guard(this);
+ movz(xd, 0);
+ addpl(xd, xd, static_cast<int>(multiplier));
+ return;
+ }
+
+ // TODO: Some probable cases result in rather long sequences. For example,
+ // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
+ // outside the encodable range. We should look for ways to cover such cases
+ // without drastically increasing the complexity of this logic.
+
+ // For other cases, calculate xn + (PL * multiplier) using discrete
+ // instructions. This requires two scratch registers in the general case, so
+ // try to re-use the destination as a scratch register.
+ UseScratchRegisterScope temps(this);
+ temps.Include(xd);
+ temps.Exclude(xn);
+
+ Register scratch = temps.AcquireX();
+ // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
+ // scale the multiplier because (we already know) it isn't a multiple of 8.
+ Rdvl(scratch, multiplier);
+
+ MacroEmissionCheckScope guard(this);
+ if (xn.IsZero()) {
+ asr(xd, scratch, kZRegBitsPerPRegBitLog2);
+ } else if (xd.IsSP() || xn.IsSP()) {
+ // TODO: MacroAssembler::Add should be able to handle this.
+ asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
+ add(xd, xn, scratch);
+ } else {
+ add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
+ }
+}
+
+void MacroAssembler::Addvl(const Register& xd,
+ const Register& xn,
+ int64_t multiplier) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(xd.IsX());
+ VIXL_ASSERT(xn.IsX());
+
+ // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
+ VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
+ VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
+
+ if (xd.IsZero()) return;
+ if (xn.IsZero() && xd.IsSP()) {
+ // TODO: This operation doesn't make much sense, but we could support it
+ // with a scratch register if necessary. `rdvl` cannot write into `sp`.
+ VIXL_UNIMPLEMENTED();
+ }
+
+ if (IsInt6(multiplier)) {
+ SingleEmissionCheckScope guard(this);
+ if (xn.IsZero()) {
+ rdvl(xd, static_cast<int>(multiplier));
+ } else {
+ addvl(xd, xn, static_cast<int>(multiplier));
+ }
+ return;
+ }
+
+ // TODO: Some probable cases result in rather long sequences. For example,
+ // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
+ // outside the encodable range. We should look for ways to cover such cases
+ // without drastically increasing the complexity of this logic.
+
+ // For other cases, calculate xn + (VL * multiplier) using discrete
+ // instructions. This requires two scratch registers in the general case, so
+ // we try to re-use the destination as a scratch register.
+ UseScratchRegisterScope temps(this);
+ temps.Include(xd);
+ temps.Exclude(xn);
+
+ Register a = temps.AcquireX();
+ Mov(a, multiplier);
+
+ MacroEmissionCheckScope guard(this);
+ Register b = temps.AcquireX();
+ rdvl(b, 1);
+ if (xn.IsZero()) {
+ mul(xd, a, b);
+ } else if (xd.IsSP() || xn.IsSP()) {
+ mul(a, a, b);
+ add(xd, xn, a);
+ } else {
+ madd(xd, a, b, xn);
+ }
+}
+
+void MacroAssembler::CalculateSVEAddress(const Register& xd,
+ const SVEMemOperand& addr,
+ int vl_divisor_log2) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!addr.IsScatterGather());
+ VIXL_ASSERT(xd.IsX());
+
+ // The lower bound is where a whole Z register is accessed.
+ VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
+ // The upper bound is for P register accesses, and for instructions like
+ // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
+ VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
+
+ SVEOffsetModifier mod = addr.GetOffsetModifier();
+ Register base = addr.GetScalarBase();
+
+ if (addr.IsEquivalentToScalar()) {
+ // For example:
+ // [x0]
+ // [x0, #0]
+ // [x0, xzr, LSL 2]
+ Mov(xd, base);
+ } else if (addr.IsScalarPlusImmediate()) {
+ // For example:
+ // [x0, #42]
+ // [x0, #42, MUL VL]
+ int64_t offset = addr.GetImmediateOffset();
+ VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar.
+ if (addr.IsMulVl()) {
+ int vl_divisor = 1 << vl_divisor_log2;
+ // For all possible values of vl_divisor, we can simply use `Addpl`. This
+ // will select `addvl` if necessary.
+ VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
+ Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
+ } else {
+ // IsScalarPlusImmediate() ensures that no other modifiers can occur.
+ VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
+ Add(xd, base, offset);
+ }
+ } else if (addr.IsScalarPlusScalar()) {
+ // For example:
+ // [x0, x1]
+ // [x0, x1, LSL #4]
+ Register offset = addr.GetScalarOffset();
+ VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar.
+ if (mod == SVE_LSL) {
+ Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
+ } else {
+ // IsScalarPlusScalar() ensures that no other modifiers can occur.
+ VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
+ Add(xd, base, offset);
+ }
+ } else {
+ // All other forms are scatter-gather addresses, which cannot be evaluated
+ // into an X register.
+ VIXL_UNREACHABLE();
+ }
+}
+
+void MacroAssembler::Cpy(const ZRegister& zd,
+ const PRegister& pg,
+ IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInLane(zd));
+ int imm8;
+ int shift;
+ if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+ imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+ SingleEmissionCheckScope guard(this);
+ cpy(zd, pg, imm8, shift);
+ return;
+ }
+
+ // The fallbacks rely on `cpy` variants that only support merging predication.
+ // If zeroing predication was requested, zero the destination first.
+ if (pg.IsZeroing()) {
+ SingleEmissionCheckScope guard(this);
+ dup(zd, 0);
+ }
+ PRegisterM pg_m = pg.Merging();
+
+ // Try to encode the immediate using fcpy.
+ VIXL_ASSERT(imm.FitsInLane(zd));
+ if (zd.GetLaneSizeInBits() >= kHRegSize) {
+ double fp_imm = 0.0;
+ switch (zd.GetLaneSizeInBits()) {
+ case kHRegSize:
+ fp_imm =
+ FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
+ break;
+ case kSRegSize:
+ fp_imm = RawbitsToFloat(imm.AsUint32());
+ break;
+ case kDRegSize:
+ fp_imm = RawbitsToDouble(imm.AsUint64());
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ break;
+ }
+ // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
+ // we can use IsImmFP64 for all lane sizes.
+ if (IsImmFP64(fp_imm)) {
+ SingleEmissionCheckScope guard(this);
+ fcpy(zd, pg_m, fp_imm);
+ return;
+ }
+ }
+
+ // Fall back to using a scratch register.
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireRegisterToHoldLane(zd);
+ Mov(scratch, imm);
+
+ SingleEmissionCheckScope guard(this);
+ cpy(zd, pg_m, scratch);
+}
+
+// TODO: We implement Fcpy (amongst other things) for all FP types because it
+// allows us to preserve user-specified NaNs. We should come up with some
+// FPImmediate type to abstract this, and avoid all the duplication below (and
+// elsewhere).
+
+void MacroAssembler::Fcpy(const ZRegister& zd,
+ const PRegisterM& pg,
+ double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(pg.IsMerging());
+
+ if (IsImmFP64(imm)) {
+ SingleEmissionCheckScope guard(this);
+ fcpy(zd, pg, imm);
+ return;
+ }
+
+ // As a fall-back, cast the immediate to the required lane size, and try to
+ // encode the bit pattern using `Cpy`.
+ Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
+}
+
+void MacroAssembler::Fcpy(const ZRegister& zd,
+ const PRegisterM& pg,
+ float imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(pg.IsMerging());
+
+ if (IsImmFP32(imm)) {
+ SingleEmissionCheckScope guard(this);
+ fcpy(zd, pg, imm);
+ return;
+ }
+
+ // As a fall-back, cast the immediate to the required lane size, and try to
+ // encode the bit pattern using `Cpy`.
+ Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
+}
+
+void MacroAssembler::Fcpy(const ZRegister& zd,
+ const PRegisterM& pg,
+ Float16 imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(pg.IsMerging());
+
+ if (IsImmFP16(imm)) {
+ SingleEmissionCheckScope guard(this);
+ fcpy(zd, pg, imm);
+ return;
+ }
+
+ // As a fall-back, cast the immediate to the required lane size, and try to
+ // encode the bit pattern using `Cpy`.
+ Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
+}
+
+void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInLane(zd));
+ unsigned lane_size = zd.GetLaneSizeInBits();
+ int imm8;
+ int shift;
+ if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+ imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+ SingleEmissionCheckScope guard(this);
+ dup(zd, imm8, shift);
+ } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
+ SingleEmissionCheckScope guard(this);
+ dupm(zd, imm.AsUintN(lane_size));
+ } else {
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireRegisterToHoldLane(zd);
+ Mov(scratch, imm);
+
+ SingleEmissionCheckScope guard(this);
+ dup(zd, scratch);
+ }
+}
+
+void MacroAssembler::NoncommutativeArithmeticHelper(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEArithPredicatedFn fn,
+ SVEArithPredicatedFn rev_fn) {
+ if (zd.Aliases(zn)) {
+ // E.g. zd = zd / zm
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, pg, zn, zm);
+ } else if (zd.Aliases(zm)) {
+ // E.g. zd = zn / zd
+ SingleEmissionCheckScope guard(this);
+ (this->*rev_fn)(zd, pg, zm, zn);
+ } else {
+ // E.g. zd = zn / zm
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ (this->*fn)(zd, pg, zd, zm);
+ }
+}
+
+void MacroAssembler::FPCommutativeArithmeticHelper(
+ const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEArithPredicatedFn fn,
+ FPMacroNaNPropagationOption nan_option) {
+ ResolveFPNaNPropagationOption(&nan_option);
+
+ if (zd.Aliases(zn)) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, pg, zd, zm);
+ } else if (zd.Aliases(zm)) {
+ switch (nan_option) {
+ case FastNaNPropagation: {
+ // Swap the arguments.
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, pg, zd, zn);
+ return;
+ }
+ case StrictNaNPropagation: {
+ UseScratchRegisterScope temps(this);
+ // Use a scratch register to keep the argument order exactly as
+ // specified.
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
+ {
+ MovprfxHelperScope guard(this, scratch, pg, zn);
+ (this->*fn)(scratch, pg, scratch, zm);
+ }
+ Mov(zd, scratch);
+ return;
+ }
+ case NoFPMacroNaNPropagationSelected:
+ VIXL_UNREACHABLE();
+ return;
+ }
+ } else {
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ (this->*fn)(zd, pg, zd, zm);
+ }
+}
+
+void MacroAssembler::Asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::asr),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::asrr));
+}
+
+void MacroAssembler::Lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::lsl),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::lslr));
+}
+
+void MacroAssembler::Lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::lsr),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::lsrr));
+}
+
+void MacroAssembler::Fdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fdiv),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fdivr));
+}
+
+void MacroAssembler::Fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fsub),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fsubr));
+}
+
+void MacroAssembler::Fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fadd),
+ nan_option);
+}
+
+void MacroAssembler::Fabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fabd),
+ nan_option);
+}
+
+void MacroAssembler::Fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fmul),
+ nan_option);
+}
+
+void MacroAssembler::Fmulx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fmulx),
+ nan_option);
+}
+
+void MacroAssembler::Fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fmax),
+ nan_option);
+}
+
+void MacroAssembler::Fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fmin),
+ nan_option);
+}
+
+void MacroAssembler::Fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fmaxnm),
+ nan_option);
+}
+
+void MacroAssembler::Fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPCommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::fminnm),
+ nan_option);
+}
+
+void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+
+ switch (zd.GetLaneSizeInBits()) {
+ case kHRegSize:
+ Fdup(zd, Float16(imm));
+ break;
+ case kSRegSize:
+ Fdup(zd, static_cast<float>(imm));
+ break;
+ case kDRegSize:
+ if (IsImmFP64(imm)) {
+ SingleEmissionCheckScope guard(this);
+ fdup(zd, imm);
+ } else {
+ Dup(zd, DoubleToRawbits(imm));
+ }
+ break;
+ }
+}
+
+void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+
+ switch (zd.GetLaneSizeInBits()) {
+ case kHRegSize:
+ Fdup(zd, Float16(imm));
+ break;
+ case kSRegSize:
+ if (IsImmFP32(imm)) {
+ SingleEmissionCheckScope guard(this);
+ fdup(zd, imm);
+ } else {
+ Dup(zd, FloatToRawbits(imm));
+ }
+ break;
+ case kDRegSize:
+ Fdup(zd, static_cast<double>(imm));
+ break;
+ }
+}
+
+void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+
+ switch (zd.GetLaneSizeInBits()) {
+ case kHRegSize:
+ if (IsImmFP16(imm)) {
+ SingleEmissionCheckScope guard(this);
+ fdup(zd, imm);
+ } else {
+ Dup(zd, Float16ToRawbits(imm));
+ }
+ break;
+ case kSRegSize:
+ Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
+ break;
+ case kDRegSize:
+ Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
+ break;
+ }
+}
+
+void MacroAssembler::Index(const ZRegister& zd,
+ const Operand& start,
+ const Operand& step) {
+ class IndexOperand : public Operand {
+ public:
+ static IndexOperand Prepare(MacroAssembler* masm,
+ UseScratchRegisterScope* temps,
+ const Operand& op,
+ const ZRegister& zd) {
+ // Look for encodable immediates.
+ int imm;
+ if (op.IsImmediate()) {
+ if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd, &imm)) {
+ return IndexOperand(imm);
+ }
+ Register scratch = temps->AcquireRegisterToHoldLane(zd);
+ masm->Mov(scratch, op);
+ return IndexOperand(scratch);
+ } else {
+ // Plain registers can be encoded directly.
+ VIXL_ASSERT(op.IsPlainRegister());
+ return IndexOperand(op.GetRegister());
+ }
+ }
+
+ int GetImm5() const {
+ int64_t imm = GetImmediate();
+ VIXL_ASSERT(IsInt5(imm));
+ return static_cast<int>(imm);
+ }
+
+ private:
+ explicit IndexOperand(const Register& reg) : Operand(reg) {}
+ explicit IndexOperand(int64_t imm) : Operand(imm) {}
+ };
+
+ UseScratchRegisterScope temps(this);
+ IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
+ IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
+
+ SingleEmissionCheckScope guard(this);
+ if (start_enc.IsImmediate()) {
+ if (step_enc.IsImmediate()) {
+ index(zd, start_enc.GetImm5(), step_enc.GetImm5());
+ } else {
+ index(zd, start_enc.GetImm5(), step_enc.GetRegister());
+ }
+ } else {
+ if (step_enc.IsImmediate()) {
+ index(zd, start_enc.GetRegister(), step_enc.GetImm5());
+ } else {
+ index(zd, start_enc.GetRegister(), step_enc.GetRegister());
+ }
+ }
+}
+
+void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(imm.FitsInLane(zdn));
+
+ if (imm.IsZero()) {
+ SingleEmissionCheckScope guard(this);
+ insr(zdn, xzr);
+ return;
+ }
+
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireRegisterToHoldLane(zdn);
+
+ // TODO: There are many cases where we could optimise immediates, such as by
+ // detecting repeating patterns or FP immediates. We should optimise and
+ // abstract this for use in other SVE mov-immediate-like macros.
+ Mov(scratch, imm);
+
+ SingleEmissionCheckScope guard(this);
+ insr(zdn, scratch);
+}
+
+void MacroAssembler::Mla(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(za)) {
+ // zda = zda + (zn * zm)
+ SingleEmissionCheckScope guard(this);
+ mla(zd, pg, zn, zm);
+ } else if (zd.Aliases(zn)) {
+ // zdn = za + (zdn * zm)
+ SingleEmissionCheckScope guard(this);
+ mad(zd, pg, zm, za);
+ } else if (zd.Aliases(zm)) {
+ // Multiplication is commutative, so we can swap zn and zm.
+ // zdm = za + (zdm * zn)
+ SingleEmissionCheckScope guard(this);
+ mad(zd, pg, zn, za);
+ } else {
+ // zd = za + (zn * zm)
+ ExactAssemblyScope guard(this, 2 * kInstructionSize);
+ movprfx(zd, pg, za);
+ mla(zd, pg, zn, zm);
+ }
+}
+
+void MacroAssembler::Mls(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(za)) {
+ // zda = zda - (zn * zm)
+ SingleEmissionCheckScope guard(this);
+ mls(zd, pg, zn, zm);
+ } else if (zd.Aliases(zn)) {
+ // zdn = za - (zdn * zm)
+ SingleEmissionCheckScope guard(this);
+ msb(zd, pg, zm, za);
+ } else if (zd.Aliases(zm)) {
+ // Multiplication is commutative, so we can swap zn and zm.
+ // zdm = za - (zdm * zn)
+ SingleEmissionCheckScope guard(this);
+ msb(zd, pg, zn, za);
+ } else {
+ // zd = za - (zn * zm)
+ ExactAssemblyScope guard(this, 2 * kInstructionSize);
+ movprfx(zd, pg, za);
+ mls(zd, pg, zn, zm);
+ }
+}
+
+void MacroAssembler::CompareHelper(Condition cond,
+ const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ IntegerOperand imm) {
+ UseScratchRegisterScope temps(this);
+ ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
+ Dup(zm, imm);
+ SingleEmissionCheckScope guard(this);
+ cmp(cond, pd, pg, zn, zm);
+}
+
+void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(pd.IsLaneSizeB());
+ VIXL_ASSERT(pn.IsLaneSizeB());
+ if (pd.Is(pn)) {
+ SingleEmissionCheckScope guard(this);
+ pfirst(pd, pg, pn);
+ } else {
+ UseScratchRegisterScope temps(this);
+ PRegister temp_pg = pg;
+ if (pd.Aliases(pg)) {
+ temp_pg = temps.AcquireP();
+ Mov(temp_pg.VnB(), pg.VnB());
+ }
+ Mov(pd, pn);
+ SingleEmissionCheckScope guard(this);
+ pfirst(pd, temp_pg, pd);
+ }
+}
+
+void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(AreSameFormat(pd, pn));
+ if (pd.Is(pn)) {
+ SingleEmissionCheckScope guard(this);
+ pnext(pd, pg, pn);
+ } else {
+ UseScratchRegisterScope temps(this);
+ PRegister temp_pg = pg;
+ if (pd.Aliases(pg)) {
+ temp_pg = temps.AcquireP();
+ Mov(temp_pg.VnB(), pg.VnB());
+ }
+ Mov(pd.VnB(), pn.VnB());
+ SingleEmissionCheckScope guard(this);
+ pnext(pd, temp_pg, pd);
+ }
+}
+
+void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
+ SVEPredicateConstraint pattern,
+ FlagsUpdate s) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ switch (s) {
+ case LeaveFlags:
+ Ptrue(pd, pattern);
+ return;
+ case SetFlags:
+ Ptrues(pd, pattern);
+ return;
+ }
+ VIXL_UNREACHABLE();
+}
+
+void MacroAssembler::Sdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::sdiv),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::sdivr));
+}
+
+void MacroAssembler::Sub(const ZRegister& zd,
+ IntegerOperand imm,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+
+ int imm8;
+ int shift = -1;
+ if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
+ imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
+ MovprfxHelperScope guard(this, zd, zm);
+ subr(zd, zd, imm8, shift);
+ } else {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
+ Dup(scratch, imm);
+
+ SingleEmissionCheckScope guard(this);
+ sub(zd, scratch, zm);
+ }
+}
+
+void MacroAssembler::Sub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::sub),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::subr));
+}
+
+void MacroAssembler::Udiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ NoncommutativeArithmeticHelper(zd,
+ pg,
+ zn,
+ zm,
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::udiv),
+ static_cast<SVEArithPredicatedFn>(
+ &Assembler::udivr));
+}
+
+void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ SVELoadBroadcastFn fn,
+ int divisor) {
+ VIXL_ASSERT(addr.IsScalarPlusImmediate());
+ int64_t imm = addr.GetImmediateOffset();
+ if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ } else {
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX();
+ CalculateSVEAddress(scratch, addr, zt);
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, SVEMemOperand(scratch));
+ }
+}
+
+void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
+ const SVEMemOperand& addr,
+ SVELoadStoreFn fn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
+
+ if (addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
+ addr.IsMulVl())) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(rt, addr);
+ return;
+ }
+
+ if (addr.IsEquivalentToScalar()) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
+ return;
+ }
+
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX();
+ CalculateSVEAddress(scratch, addr, rt);
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(rt, SVEMemOperand(scratch));
+}
+
+template <typename Tg, typename Tf>
+void MacroAssembler::SVELoadStoreScalarImmHelper(
+ const ZRegister& zt,
+ const Tg& pg,
+ const SVEMemOperand& addr,
+ Tf fn,
+ int imm_bits,
+ int shift_amount,
+ SVEOffsetModifier supported_modifier,
+ int vl_divisor_log2) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ int imm_divisor = 1 << shift_amount;
+
+ if (addr.IsPlainScalar() ||
+ (addr.IsScalarPlusImmediate() &&
+ IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
+ ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
+ (addr.GetOffsetModifier() == supported_modifier))) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ return;
+ }
+
+ if (addr.IsEquivalentToScalar()) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
+ return;
+ }
+
+ if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
+ (vl_divisor_log2 == -1)) {
+ // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
+ // dependent.
+ VIXL_UNIMPLEMENTED();
+ }
+
+ UseScratchRegisterScope temps(this);
+ Register scratch = temps.AcquireX();
+ CalculateSVEAddress(scratch, addr, vl_divisor_log2);
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, SVEMemOperand(scratch));
+}
+
+template <typename Tg, typename Tf>
+void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
+ const ZRegister& zt,
+ const Tg& pg,
+ const SVEMemOperand& addr,
+ Tf fn) {
+ if (addr.IsPlainScalar() ||
+ (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
+ addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
+ (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
+ addr.IsMulVl())) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ return;
+ }
+
+ if (addr.IsEquivalentToScalar()) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
+ return;
+ }
+
+ if (addr.IsVectorPlusImmediate()) {
+ uint64_t offset = addr.GetImmediateOffset();
+ if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
+ IsUint5(offset >> msize_in_bytes_log2)) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ return;
+ }
+ }
+
+ if (addr.IsScalarPlusVector()) {
+ VIXL_ASSERT(addr.IsScatterGather());
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ return;
+ }
+
+ UseScratchRegisterScope temps(this);
+ if (addr.IsScatterGather()) {
+ // In scatter-gather modes, zt and zn/zm have the same lane size. However,
+ // for 32-bit accesses, the result of each lane's address calculation still
+ // requires 64 bits; we can't naively use `Adr` for the address calculation
+ // because it would truncate each address to 32 bits.
+
+ if (addr.IsVectorPlusImmediate()) {
+ // Synthesise the immediate in an X register, then use a
+ // scalar-plus-vector access with the original vector.
+ Register scratch = temps.AcquireX();
+ Mov(scratch, addr.GetImmediateOffset());
+ SingleEmissionCheckScope guard(this);
+ SVEOffsetModifier om =
+ zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
+ (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
+ return;
+ }
+
+ VIXL_UNIMPLEMENTED();
+ } else {
+ Register scratch = temps.AcquireX();
+ // TODO: If we have an immediate offset that is a multiple of
+ // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
+ // save an instruction.
+ int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
+ CalculateSVEAddress(scratch, addr, vl_divisor_log2);
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, SVEMemOperand(scratch));
+ }
+}
+
+template <typename Tf>
+void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
+ const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr,
+ Tf fn) {
+ if (addr.IsScatterGather()) {
+ // Scatter-gather first-fault loads share encodings with normal loads.
+ SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
+ return;
+ }
+
+ // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
+ // so we don't do immediate synthesis.
+
+ // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
+ // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
+ if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
+ addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ return;
+ }
+
+ VIXL_UNIMPLEMENTED();
+}
+
+void MacroAssembler::Ld1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kBRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1b));
+}
+
+void MacroAssembler::Ld1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kHRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1h));
+}
+
+void MacroAssembler::Ld1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kWRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1w));
+}
+
+void MacroAssembler::Ld1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kDRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1d));
+}
+
+void MacroAssembler::Ld1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kBRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1sb));
+}
+
+void MacroAssembler::Ld1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kHRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1sh));
+}
+
+void MacroAssembler::Ld1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kSRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ld1sw));
+}
+
+void MacroAssembler::St1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kBRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVEStore1Fn>(&Assembler::st1b));
+}
+
+void MacroAssembler::St1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kHRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVEStore1Fn>(&Assembler::st1h));
+}
+
+void MacroAssembler::St1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kSRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVEStore1Fn>(&Assembler::st1w));
+}
+
+void MacroAssembler::St1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStore1Helper(kDRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVEStore1Fn>(&Assembler::st1d));
+}
+
+void MacroAssembler::Ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kBRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1b));
+}
+
+void MacroAssembler::Ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kHRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1h));
+}
+
+void MacroAssembler::Ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kSRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1w));
+}
+
+void MacroAssembler::Ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kDRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1d));
+}
+
+void MacroAssembler::Ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kBRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
+}
+
+void MacroAssembler::Ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kHRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
+}
+
+void MacroAssembler::Ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadFFHelper(kSRegSizeInBytesLog2,
+ zt,
+ pg,
+ addr,
+ static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
+}
+
+void MacroAssembler::Ld1rqb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rqb,
+ 4,
+ 4,
+ NO_SVE_OFFSET_MODIFIER,
+ -1);
+}
+
+void MacroAssembler::Ld1rqd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rqd,
+ 4,
+ 4,
+ NO_SVE_OFFSET_MODIFIER,
+ -1);
+}
+
+void MacroAssembler::Ld1rqh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rqh,
+ 4,
+ 4,
+ NO_SVE_OFFSET_MODIFIER,
+ -1);
+}
+
+void MacroAssembler::Ld1rqw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ld1rqw,
+ 4,
+ 4,
+ NO_SVE_OFFSET_MODIFIER,
+ -1);
+}
+
+void MacroAssembler::Ldnt1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1b,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+
+void MacroAssembler::Ldnt1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1d,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+
+void MacroAssembler::Ldnt1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1h,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+
+void MacroAssembler::Ldnt1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1w,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+
+void MacroAssembler::Stnt1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1b,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+void MacroAssembler::Stnt1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1d,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+void MacroAssembler::Stnt1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1h,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+void MacroAssembler::Stnt1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVELoadStoreScalarImmHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1w,
+ 4,
+ 0,
+ SVE_MUL_VL);
+}
+
+void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ if (zd.Aliases(za)) {
+ // zda = zda + (zn . zm)
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, zn, zm, index);
+
+ } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
+ // zdn = za + (zdn . zm[index])
+ // zdm = za + (zn . zdm[index])
+ // zdnm = za + (zdnm . zdnm[index])
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, za);
+ (this->*fn)(scratch, zn, zm, index);
+ }
+
+ Mov(zd, scratch);
+ } else {
+ // zd = za + (zn . zm)
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, zm, index);
+ }
+}
+
+void MacroAssembler::SVESdotUdotHelper(IntArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ if (zd.Aliases(za)) {
+ // zda = zda + (zn . zm)
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, zn, zm);
+
+ } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
+ // zdn = za + (zdn . zm)
+ // zdm = za + (zn . zdm)
+ // zdnm = za + (zdnm . zdnm)
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, za);
+ (this->*fn)(scratch, zn, zm);
+ }
+
+ Mov(zd, scratch);
+ } else {
+ // zd = za + (zn . zm)
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, zm);
+ }
+}
+
+void MacroAssembler::Fscale(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
+ Mov(scratch, zm);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fscale(zd, pg, zd, scratch);
+ } else {
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fscale(zd, pg, zd, zm);
+ }
+}
+
+void MacroAssembler::Sdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVESdotUdotHelper(&Assembler::sdot, zd, za, zn, zm);
+}
+
+void MacroAssembler::Sdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVESdotUdotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Udot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVESdotUdotHelper(&Assembler::udot, zd, za, zn, zm);
+}
+
+void MacroAssembler::Udot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVESdotUdotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ SVEMulAddPredicatedZdaFn fn_zda,
+ SVEMulAddPredicatedZdnFn fn_zdn,
+ FPMacroNaNPropagationOption nan_option) {
+ ResolveFPNaNPropagationOption(&nan_option);
+
+ if (zd.Aliases(za)) {
+ // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
+ SingleEmissionCheckScope guard(this);
+ (this->*fn_zda)(zd, pg, zn, zm);
+ } else if (zd.Aliases(zn)) {
+ // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
+ SingleEmissionCheckScope guard(this);
+ (this->*fn_zdn)(zd, pg, zm, za);
+ } else if (zd.Aliases(zm)) {
+ switch (nan_option) {
+ case FastNaNPropagation: {
+ // We treat multiplication as commutative in the fast mode, so we can
+ // swap zn and zm.
+ // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
+ SingleEmissionCheckScope guard(this);
+ (this->*fn_zdn)(zd, pg, zn, za);
+ return;
+ }
+ case StrictNaNPropagation: {
+ UseScratchRegisterScope temps(this);
+ // Use a scratch register to keep the argument order exactly as
+ // specified.
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
+ {
+ MovprfxHelperScope guard(this, scratch, pg, za);
+ // scratch = (-)za + ((-)zn * zm)
+ (this->*fn_zda)(scratch, pg, zn, zm);
+ }
+ Mov(zd, scratch);
+ return;
+ }
+ case NoFPMacroNaNPropagationSelected:
+ VIXL_UNREACHABLE();
+ return;
+ }
+ } else {
+ // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
+ MovprfxHelperScope guard(this, zd, pg, za);
+ (this->*fn_zda)(zd, pg, zn, zm);
+ }
+}
+
+void MacroAssembler::FPMulAddIndexHelper(SVEMulAddIndexFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ if (zd.Aliases(za)) {
+ // zda = zda + (zn * zm[i])
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, zn, zm, index);
+
+ } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
+ // zdn = za + (zdn * zm[i])
+ // zdm = za + (zn * zdm[i])
+ // zdnm = za + (zdnm * zdnm[i])
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, za);
+ (this->*fn)(scratch, zn, zm, index);
+ }
+ Mov(zd, scratch);
+ } else {
+ // zd = za + (zn * zm[i])
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, zm, index);
+ }
+}
+
+void MacroAssembler::Fmla(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPMulAddHelper(zd,
+ pg,
+ za,
+ zn,
+ zm,
+ &Assembler::fmla,
+ &Assembler::fmad,
+ nan_option);
+}
+
+void MacroAssembler::Fmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPMulAddIndexHelper(&Assembler::fmla, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Fmls(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPMulAddHelper(zd,
+ pg,
+ za,
+ zn,
+ zm,
+ &Assembler::fmls,
+ &Assembler::fmsb,
+ nan_option);
+}
+
+void MacroAssembler::Fmls(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPMulAddIndexHelper(&Assembler::fmls, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Fnmla(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPMulAddHelper(zd,
+ pg,
+ za,
+ zn,
+ zm,
+ &Assembler::fnmla,
+ &Assembler::fnmad,
+ nan_option);
+}
+
+void MacroAssembler::Fnmls(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ FPMacroNaNPropagationOption nan_option) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ FPMulAddHelper(zd,
+ pg,
+ za,
+ zn,
+ zm,
+ &Assembler::fnmls,
+ &Assembler::fnmsb,
+ nan_option);
+}
+
+void MacroAssembler::Ftmad(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm3) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
+ Mov(scratch, zm);
+ MovprfxHelperScope guard(this, zd, zn);
+ ftmad(zd, zd, scratch, imm3);
+ } else {
+ MovprfxHelperScope guard(this, zd, zn);
+ ftmad(zd, zd, zm, imm3);
+ }
+}
+
+void MacroAssembler::Fcadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, pg, zn);
+ fcadd(scratch, pg, scratch, zm, rot);
+ }
+ Mov(zd, scratch);
+ } else {
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ fcadd(zd, pg, zd, zm, rot);
+ }
+}
+
+void MacroAssembler::Ext(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned offset) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ // zd = ext(zn, zd, offset)
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, zn);
+ ext(scratch, scratch, zm, offset);
+ }
+ Mov(zd, scratch);
+ } else {
+ // zd = ext(zn, zm, offset)
+ // zd = ext(zd, zd, offset)
+ MovprfxHelperScope guard(this, zd, zn);
+ ext(zd, zd, zm, offset);
+ }
+}
+
+void MacroAssembler::Splice(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, zn);
+ splice(scratch, pg, scratch, zm);
+ }
+ Mov(zd, scratch);
+ } else {
+ MovprfxHelperScope guard(this, zd, zn);
+ splice(zd, pg, zd, zm);
+ }
+}
+
+void MacroAssembler::Clasta(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, zn);
+ clasta(scratch, pg, scratch, zm);
+ }
+ Mov(zd, scratch);
+ } else {
+ MovprfxHelperScope guard(this, zd, zn);
+ clasta(zd, pg, zd, zm);
+ }
+}
+
+void MacroAssembler::Clastb(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, zn);
+ clastb(scratch, pg, scratch, zm);
+ }
+ Mov(zd, scratch);
+ } else {
+ MovprfxHelperScope guard(this, zd, zn);
+ clastb(zd, pg, zd, zm);
+ }
+}
+
+} // namespace aarch64
+} // namespace vixl
diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc
index 20364616..008179e4 100644
--- a/src/aarch64/operands-aarch64.cc
+++ b/src/aarch64/operands-aarch64.cc
@@ -30,32 +30,32 @@ namespace vixl {
namespace aarch64 {
// CPURegList utilities.
-CPURegister CPURegList::PopLowestIndex() {
- if (IsEmpty()) {
- return NoCPUReg;
- }
- int index = CountTrailingZeros(list_);
- VIXL_ASSERT((1 << index) & list_);
+CPURegister CPURegList::PopLowestIndex(RegList mask) {
+ RegList list = list_ & mask;
+ if (list == 0) return NoCPUReg;
+ int index = CountTrailingZeros(list);
+ VIXL_ASSERT(((1 << index) & list) != 0);
Remove(index);
return CPURegister(index, size_, type_);
}
-CPURegister CPURegList::PopHighestIndex() {
- VIXL_ASSERT(IsValid());
- if (IsEmpty()) {
- return NoCPUReg;
- }
- int index = CountLeadingZeros(list_);
+CPURegister CPURegList::PopHighestIndex(RegList mask) {
+ RegList list = list_ & mask;
+ if (list == 0) return NoCPUReg;
+ int index = CountLeadingZeros(list);
index = kRegListSizeInBits - 1 - index;
- VIXL_ASSERT((1 << index) & list_);
+ VIXL_ASSERT(((1 << index) & list) != 0);
Remove(index);
return CPURegister(index, size_, type_);
}
bool CPURegList::IsValid() const {
- if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) {
+ if (type_ == CPURegister::kNoRegister) {
+ // We can't use IsEmpty here because that asserts IsValid().
+ return list_ == 0;
+ } else {
bool is_valid = true;
// Try to create a CPURegister for each element in the list.
for (int i = 0; i < kRegListSizeInBits; i++) {
@@ -64,11 +64,6 @@ bool CPURegList::IsValid() const {
}
}
return is_valid;
- } else if (type_ == CPURegister::kNoRegister) {
- // We can't use IsEmpty here because that asserts IsValid().
- return list_ == 0;
- } else {
- return false;
}
}
@@ -149,145 +144,6 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
-
-// Registers.
-#define WREG(n) w##n,
-const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)};
-#undef WREG
-
-#define XREG(n) x##n,
-const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)};
-#undef XREG
-
-#define BREG(n) b##n,
-const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)};
-#undef BREG
-
-#define HREG(n) h##n,
-const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)};
-#undef HREG
-
-#define SREG(n) s##n,
-const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)};
-#undef SREG
-
-#define DREG(n) d##n,
-const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)};
-#undef DREG
-
-#define QREG(n) q##n,
-const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)};
-#undef QREG
-
-#define VREG(n) v##n,
-const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)};
-#undef VREG
-
-
-const Register& Register::GetWRegFromCode(unsigned code) {
- if (code == kSPRegInternalCode) {
- return wsp;
- } else {
- VIXL_ASSERT(code < kNumberOfRegisters);
- return wregisters[code];
- }
-}
-
-
-const Register& Register::GetXRegFromCode(unsigned code) {
- if (code == kSPRegInternalCode) {
- return sp;
- } else {
- VIXL_ASSERT(code < kNumberOfRegisters);
- return xregisters[code];
- }
-}
-
-
-const VRegister& VRegister::GetBRegFromCode(unsigned code) {
- VIXL_ASSERT(code < kNumberOfVRegisters);
- return bregisters[code];
-}
-
-
-const VRegister& VRegister::GetHRegFromCode(unsigned code) {
- VIXL_ASSERT(code < kNumberOfVRegisters);
- return hregisters[code];
-}
-
-
-const VRegister& VRegister::GetSRegFromCode(unsigned code) {
- VIXL_ASSERT(code < kNumberOfVRegisters);
- return sregisters[code];
-}
-
-
-const VRegister& VRegister::GetDRegFromCode(unsigned code) {
- VIXL_ASSERT(code < kNumberOfVRegisters);
- return dregisters[code];
-}
-
-
-const VRegister& VRegister::GetQRegFromCode(unsigned code) {
- VIXL_ASSERT(code < kNumberOfVRegisters);
- return qregisters[code];
-}
-
-
-const VRegister& VRegister::GetVRegFromCode(unsigned code) {
- VIXL_ASSERT(code < kNumberOfVRegisters);
- return vregisters[code];
-}
-
-
-const Register& CPURegister::W() const {
- VIXL_ASSERT(IsValidRegister());
- return Register::GetWRegFromCode(code_);
-}
-
-
-const Register& CPURegister::X() const {
- VIXL_ASSERT(IsValidRegister());
- return Register::GetXRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::B() const {
- VIXL_ASSERT(IsValidVRegister());
- return VRegister::GetBRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::H() const {
- VIXL_ASSERT(IsValidVRegister());
- return VRegister::GetHRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::S() const {
- VIXL_ASSERT(IsValidVRegister());
- return VRegister::GetSRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::D() const {
- VIXL_ASSERT(IsValidVRegister());
- return VRegister::GetDRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::Q() const {
- VIXL_ASSERT(IsValidVRegister());
- return VRegister::GetQRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::V() const {
- VIXL_ASSERT(IsValidVRegister());
- return VRegister::GetVRegFromCode(code_);
-}
-
-
// Operand.
Operand::Operand(int64_t immediate)
: immediate_(immediate),
@@ -296,6 +152,12 @@ Operand::Operand(int64_t immediate)
extend_(NO_EXTEND),
shift_amount_(0) {}
+Operand::Operand(IntegerOperand immediate)
+ : immediate_(immediate.AsIntN(64)),
+ reg_(NoReg),
+ shift_(NO_SHIFT),
+ extend_(NO_EXTEND),
+ shift_amount_(0) {}
Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
: reg_(reg),
@@ -471,6 +333,24 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
}
+bool MemOperand::IsPlainRegister() const {
+ return IsImmediateOffset() && (GetOffset() == 0);
+}
+
+
+bool MemOperand::IsEquivalentToPlainRegister() const {
+ if (regoffset_.Is(NoReg)) {
+ // Immediate offset, pre-index or post-index.
+ return GetOffset() == 0;
+ } else if (GetRegisterOffset().IsZero()) {
+ // Zero register offset, pre-index or post-index.
+ // We can ignore shift and extend options because they all result in zero.
+ return true;
+ }
+ return false;
+}
+
+
bool MemOperand::IsImmediateOffset() const {
return (addrmode_ == Offset) && regoffset_.Is(NoReg);
}
@@ -493,6 +373,62 @@ void MemOperand::AddOffset(int64_t offset) {
}
+bool SVEMemOperand::IsValid() const {
+#ifdef VIXL_DEBUG
+ {
+ // It should not be possible for an SVEMemOperand to match multiple types.
+ int count = 0;
+ if (IsScalarPlusImmediate()) count++;
+ if (IsScalarPlusScalar()) count++;
+ if (IsScalarPlusVector()) count++;
+ if (IsVectorPlusImmediate()) count++;
+ if (IsVectorPlusVector()) count++;
+ VIXL_ASSERT(count <= 1);
+ }
+#endif
+
+ // We can't have a register _and_ an immediate offset.
+ if ((offset_ != 0) && (!regoffset_.IsNone())) return false;
+
+ if (shift_amount_ != 0) {
+ // Only shift and extend modifiers can take a shift amount.
+ switch (mod_) {
+ case NO_SVE_OFFSET_MODIFIER:
+ case SVE_MUL_VL:
+ return false;
+ case SVE_LSL:
+ case SVE_UXTW:
+ case SVE_SXTW:
+ // Fall through.
+ break;
+ }
+ }
+
+ return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
+ IsScalarPlusVector() || IsVectorPlusImmediate() ||
+ IsVectorPlusVector();
+}
+
+
+bool SVEMemOperand::IsEquivalentToScalar() const {
+ if (IsScalarPlusImmediate()) {
+ return GetImmediateOffset() == 0;
+ }
+ if (IsScalarPlusScalar()) {
+ // We can ignore the shift because it will still result in zero.
+ return GetScalarOffset().IsZero();
+ }
+ // Forms involving vectors are never equivalent to a single scalar.
+ return false;
+}
+
+bool SVEMemOperand::IsPlainRegister() const {
+ if (IsScalarPlusImmediate()) {
+ return GetImmediateOffset() == 0;
+ }
+ return false;
+}
+
GenericOperand::GenericOperand(const CPURegister& reg)
: cpu_register_(reg), mem_op_size_(0) {
if (reg.IsQ()) {
diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h
index bfc6b702..ad03a9ee 100644
--- a/src/aarch64/operands-aarch64.h
+++ b/src/aarch64/operands-aarch64.h
@@ -27,525 +27,15 @@
#ifndef VIXL_AARCH64_OPERANDS_AARCH64_H_
#define VIXL_AARCH64_OPERANDS_AARCH64_H_
+#include <sstream>
+#include <string>
+
#include "instructions-aarch64.h"
+#include "registers-aarch64.h"
namespace vixl {
namespace aarch64 {
-typedef uint64_t RegList;
-static const int kRegListSizeInBits = sizeof(RegList) * 8;
-
-
-// Registers.
-
-// Some CPURegister methods can return Register or VRegister types, so we need
-// to declare them in advance.
-class Register;
-class VRegister;
-
-class CPURegister {
- public:
- enum RegisterType {
- // The kInvalid value is used to detect uninitialized static instances,
- // which are always zero-initialized before any constructors are called.
- kInvalid = 0,
- kRegister,
- kVRegister,
- kNoRegister
- };
-
- CPURegister() : code_(0), size_(0), type_(kNoRegister) {
- VIXL_ASSERT(!IsValid());
- VIXL_ASSERT(IsNone());
- }
-
- CPURegister(unsigned code, unsigned size, RegisterType type)
- : code_(code), size_(size), type_(type) {
- VIXL_ASSERT(IsValidOrNone());
- }
-
- unsigned GetCode() const {
- VIXL_ASSERT(IsValid());
- return code_;
- }
- VIXL_DEPRECATED("GetCode", unsigned code() const) { return GetCode(); }
-
- RegisterType GetType() const {
- VIXL_ASSERT(IsValidOrNone());
- return type_;
- }
- VIXL_DEPRECATED("GetType", RegisterType type() const) { return GetType(); }
-
- RegList GetBit() const {
- VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
- return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
- }
- VIXL_DEPRECATED("GetBit", RegList Bit() const) { return GetBit(); }
-
- int GetSizeInBytes() const {
- VIXL_ASSERT(IsValid());
- VIXL_ASSERT(size_ % 8 == 0);
- return size_ / 8;
- }
- VIXL_DEPRECATED("GetSizeInBytes", int SizeInBytes() const) {
- return GetSizeInBytes();
- }
-
- int GetSizeInBits() const {
- VIXL_ASSERT(IsValid());
- return size_;
- }
- VIXL_DEPRECATED("GetSizeInBits", unsigned size() const) {
- return GetSizeInBits();
- }
- VIXL_DEPRECATED("GetSizeInBits", int SizeInBits() const) {
- return GetSizeInBits();
- }
-
- bool Is8Bits() const {
- VIXL_ASSERT(IsValid());
- return size_ == 8;
- }
-
- bool Is16Bits() const {
- VIXL_ASSERT(IsValid());
- return size_ == 16;
- }
-
- bool Is32Bits() const {
- VIXL_ASSERT(IsValid());
- return size_ == 32;
- }
-
- bool Is64Bits() const {
- VIXL_ASSERT(IsValid());
- return size_ == 64;
- }
-
- bool Is128Bits() const {
- VIXL_ASSERT(IsValid());
- return size_ == 128;
- }
-
- bool IsValid() const {
- if (IsValidRegister() || IsValidVRegister()) {
- VIXL_ASSERT(!IsNone());
- return true;
- } else {
- // This assert is hit when the register has not been properly initialized.
- // One cause for this can be an initialisation order fiasco. See
- // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
- VIXL_ASSERT(IsNone());
- return false;
- }
- }
-
- bool IsValidRegister() const {
- return IsRegister() && ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
- ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
- }
-
- bool IsValidVRegister() const {
- return IsVRegister() && ((size_ == kBRegSize) || (size_ == kHRegSize) ||
- (size_ == kSRegSize) || (size_ == kDRegSize) ||
- (size_ == kQRegSize)) &&
- (code_ < kNumberOfVRegisters);
- }
-
- bool IsValidFPRegister() const {
- return IsValidVRegister() && IsFPRegister();
- }
-
- bool IsNone() const {
- // kNoRegister types should always have size 0 and code 0.
- VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
- VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
-
- return type_ == kNoRegister;
- }
-
- bool Aliases(const CPURegister& other) const {
- VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
- return (code_ == other.code_) && (type_ == other.type_);
- }
-
- bool Is(const CPURegister& other) const {
- VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
- return Aliases(other) && (size_ == other.size_);
- }
-
- bool IsZero() const {
- VIXL_ASSERT(IsValid());
- return IsRegister() && (code_ == kZeroRegCode);
- }
-
- bool IsSP() const {
- VIXL_ASSERT(IsValid());
- return IsRegister() && (code_ == kSPRegInternalCode);
- }
-
- bool IsRegister() const { return type_ == kRegister; }
-
- bool IsVRegister() const { return type_ == kVRegister; }
-
- // CPURegister does not track lanes like VRegister does, so we have to assume
- // that we have scalar types here.
- // TODO: Encode lane information in CPURegister so that we can be consistent.
- bool IsFPRegister() const { return IsH() || IsS() || IsD(); }
-
- bool IsW() const { return IsValidRegister() && Is32Bits(); }
- bool IsX() const { return IsValidRegister() && Is64Bits(); }
-
- // These assertions ensure that the size and type of the register are as
- // described. They do not consider the number of lanes that make up a vector.
- // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
- // does not imply Is1D() or Is8B().
- // Check the number of lanes, ie. the format of the vector, using methods such
- // as Is8B(), Is1D(), etc. in the VRegister class.
- bool IsV() const { return IsVRegister(); }
- bool IsB() const { return IsV() && Is8Bits(); }
- bool IsH() const { return IsV() && Is16Bits(); }
- bool IsS() const { return IsV() && Is32Bits(); }
- bool IsD() const { return IsV() && Is64Bits(); }
- bool IsQ() const { return IsV() && Is128Bits(); }
-
- // Semantic type for sdot and udot instructions.
- bool IsS4B() const { return IsS(); }
- const VRegister& S4B() const { return S(); }
-
- const Register& W() const;
- const Register& X() const;
- const VRegister& V() const;
- const VRegister& B() const;
- const VRegister& H() const;
- const VRegister& S() const;
- const VRegister& D() const;
- const VRegister& Q() const;
-
- bool IsSameType(const CPURegister& other) const {
- return type_ == other.type_;
- }
-
- bool IsSameSizeAndType(const CPURegister& other) const {
- return (size_ == other.size_) && IsSameType(other);
- }
-
- protected:
- unsigned code_;
- int size_;
- RegisterType type_;
-
- private:
- bool IsValidOrNone() const { return IsValid() || IsNone(); }
-};
-
-
-class Register : public CPURegister {
- public:
- Register() : CPURegister() {}
- explicit Register(const CPURegister& other)
- : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()) {
- VIXL_ASSERT(IsValidRegister());
- }
- Register(unsigned code, unsigned size) : CPURegister(code, size, kRegister) {}
-
- bool IsValid() const {
- VIXL_ASSERT(IsRegister() || IsNone());
- return IsValidRegister();
- }
-
- static const Register& GetWRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetWRegFromCode",
- static const Register& WRegFromCode(unsigned code)) {
- return GetWRegFromCode(code);
- }
-
- static const Register& GetXRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetXRegFromCode",
- static const Register& XRegFromCode(unsigned code)) {
- return GetXRegFromCode(code);
- }
-
- private:
- static const Register wregisters[];
- static const Register xregisters[];
-};
-
-
-namespace internal {
-
-template <int size_in_bits>
-class FixedSizeRegister : public Register {
- public:
- FixedSizeRegister() : Register() {}
- explicit FixedSizeRegister(unsigned code) : Register(code, size_in_bits) {
- VIXL_ASSERT(IsValidRegister());
- }
- explicit FixedSizeRegister(const Register& other)
- : Register(other.GetCode(), size_in_bits) {
- VIXL_ASSERT(other.GetSizeInBits() == size_in_bits);
- VIXL_ASSERT(IsValidRegister());
- }
- explicit FixedSizeRegister(const CPURegister& other)
- : Register(other.GetCode(), other.GetSizeInBits()) {
- VIXL_ASSERT(other.GetType() == kRegister);
- VIXL_ASSERT(other.GetSizeInBits() == size_in_bits);
- VIXL_ASSERT(IsValidRegister());
- }
-
- bool IsValid() const {
- return Register::IsValid() && (GetSizeInBits() == size_in_bits);
- }
-};
-
-} // namespace internal
-
-typedef internal::FixedSizeRegister<kXRegSize> XRegister;
-typedef internal::FixedSizeRegister<kWRegSize> WRegister;
-
-
-class VRegister : public CPURegister {
- public:
- VRegister() : CPURegister(), lanes_(1) {}
- explicit VRegister(const CPURegister& other)
- : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()),
- lanes_(1) {
- VIXL_ASSERT(IsValidVRegister());
- VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
- }
- VRegister(unsigned code, unsigned size, unsigned lanes = 1)
- : CPURegister(code, size, kVRegister), lanes_(lanes) {
- VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
- }
- VRegister(unsigned code, VectorFormat format)
- : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
- lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
- VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
- }
-
- bool IsValid() const {
- VIXL_ASSERT(IsVRegister() || IsNone());
- return IsValidVRegister();
- }
-
- static const VRegister& GetBRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetBRegFromCode",
- static const VRegister& BRegFromCode(unsigned code)) {
- return GetBRegFromCode(code);
- }
-
- static const VRegister& GetHRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetHRegFromCode",
- static const VRegister& HRegFromCode(unsigned code)) {
- return GetHRegFromCode(code);
- }
-
- static const VRegister& GetSRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetSRegFromCode",
- static const VRegister& SRegFromCode(unsigned code)) {
- return GetSRegFromCode(code);
- }
-
- static const VRegister& GetDRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetDRegFromCode",
- static const VRegister& DRegFromCode(unsigned code)) {
- return GetDRegFromCode(code);
- }
-
- static const VRegister& GetQRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetQRegFromCode",
- static const VRegister& QRegFromCode(unsigned code)) {
- return GetQRegFromCode(code);
- }
-
- static const VRegister& GetVRegFromCode(unsigned code);
- VIXL_DEPRECATED("GetVRegFromCode",
- static const VRegister& VRegFromCode(unsigned code)) {
- return GetVRegFromCode(code);
- }
-
- VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
- VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
- VRegister V2H() const { return VRegister(code_, kSRegSize, 2); }
- VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
- VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
- VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
- VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
- VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
- VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
-
- bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
- bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
- bool Is2H() const { return (Is32Bits() && (lanes_ == 2)); }
- bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
- bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
- bool Is1S() const { return (Is32Bits() && (lanes_ == 1)); }
- bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
- bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
- bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
- bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
-
- // For consistency, we assert the number of lanes of these scalar registers,
- // even though there are no vectors of equivalent total size with which they
- // could alias.
- bool Is1B() const {
- VIXL_ASSERT(!(Is8Bits() && IsVector()));
- return Is8Bits();
- }
- bool Is1H() const {
- VIXL_ASSERT(!(Is16Bits() && IsVector()));
- return Is16Bits();
- }
-
- // Semantic type for sdot and udot instructions.
- bool Is1S4B() const { return Is1S(); }
-
-
- bool IsLaneSizeB() const { return GetLaneSizeInBits() == kBRegSize; }
- bool IsLaneSizeH() const { return GetLaneSizeInBits() == kHRegSize; }
- bool IsLaneSizeS() const { return GetLaneSizeInBits() == kSRegSize; }
- bool IsLaneSizeD() const { return GetLaneSizeInBits() == kDRegSize; }
-
- int GetLanes() const { return lanes_; }
- VIXL_DEPRECATED("GetLanes", int lanes() const) { return GetLanes(); }
-
- bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
- bool IsValidFPRegister() const {
- return IsValidVRegister() && IsFPRegister();
- }
-
- bool IsScalar() const { return lanes_ == 1; }
-
- bool IsVector() const { return lanes_ > 1; }
-
- bool IsSameFormat(const VRegister& other) const {
- return (size_ == other.size_) && (lanes_ == other.lanes_);
- }
-
- unsigned GetLaneSizeInBytes() const { return GetSizeInBytes() / lanes_; }
- VIXL_DEPRECATED("GetLaneSizeInBytes", unsigned LaneSizeInBytes() const) {
- return GetLaneSizeInBytes();
- }
-
- unsigned GetLaneSizeInBits() const { return GetLaneSizeInBytes() * 8; }
- VIXL_DEPRECATED("GetLaneSizeInBits", unsigned LaneSizeInBits() const) {
- return GetLaneSizeInBits();
- }
-
- private:
- static const VRegister bregisters[];
- static const VRegister hregisters[];
- static const VRegister sregisters[];
- static const VRegister dregisters[];
- static const VRegister qregisters[];
- static const VRegister vregisters[];
- int lanes_;
-};
-
-
-// No*Reg is used to indicate an unused argument, or an error case. Note that
-// these all compare equal (using the Is() method). The Register and VRegister
-// variants are provided for convenience.
-const Register NoReg;
-const VRegister NoVReg;
-const CPURegister NoCPUReg;
-
-
-#define DEFINE_REGISTERS(N) \
- const WRegister w##N(N); \
- const XRegister x##N(N);
-AARCH64_REGISTER_CODE_LIST(DEFINE_REGISTERS)
-#undef DEFINE_REGISTERS
-const WRegister wsp(kSPRegInternalCode);
-const XRegister sp(kSPRegInternalCode);
-
-
-#define DEFINE_VREGISTERS(N) \
- const VRegister b##N(N, kBRegSize); \
- const VRegister h##N(N, kHRegSize); \
- const VRegister s##N(N, kSRegSize); \
- const VRegister d##N(N, kDRegSize); \
- const VRegister q##N(N, kQRegSize); \
- const VRegister v##N(N, kQRegSize);
-AARCH64_REGISTER_CODE_LIST(DEFINE_VREGISTERS)
-#undef DEFINE_VREGISTERS
-
-
-// Register aliases.
-const XRegister ip0 = x16;
-const XRegister ip1 = x17;
-const XRegister lr = x30;
-const XRegister xzr = x31;
-const WRegister wzr = w31;
-
-
-// AreAliased returns true if any of the named registers overlap. Arguments
-// set to NoReg are ignored. The system stack pointer may be specified.
-bool AreAliased(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3 = NoReg,
- const CPURegister& reg4 = NoReg,
- const CPURegister& reg5 = NoReg,
- const CPURegister& reg6 = NoReg,
- const CPURegister& reg7 = NoReg,
- const CPURegister& reg8 = NoReg);
-
-
-// AreSameSizeAndType returns true if all of the specified registers have the
-// same size, and are of the same type. The system stack pointer may be
-// specified. Arguments set to NoReg are ignored, as are any subsequent
-// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
-bool AreSameSizeAndType(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3 = NoCPUReg,
- const CPURegister& reg4 = NoCPUReg,
- const CPURegister& reg5 = NoCPUReg,
- const CPURegister& reg6 = NoCPUReg,
- const CPURegister& reg7 = NoCPUReg,
- const CPURegister& reg8 = NoCPUReg);
-
-// AreEven returns true if all of the specified registers have even register
-// indices. Arguments set to NoReg are ignored, as are any subsequent
-// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
-bool AreEven(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3 = NoReg,
- const CPURegister& reg4 = NoReg,
- const CPURegister& reg5 = NoReg,
- const CPURegister& reg6 = NoReg,
- const CPURegister& reg7 = NoReg,
- const CPURegister& reg8 = NoReg);
-
-
-// AreConsecutive returns true if all of the specified registers are
-// consecutive in the register file. Arguments set to NoReg are ignored, as are
-// any subsequent arguments. At least one argument (reg1) must be valid
-// (not NoCPUReg).
-bool AreConsecutive(const CPURegister& reg1,
- const CPURegister& reg2,
- const CPURegister& reg3 = NoCPUReg,
- const CPURegister& reg4 = NoCPUReg);
-
-
-// AreSameFormat returns true if all of the specified VRegisters have the same
-// vector format. Arguments set to NoReg are ignored, as are any subsequent
-// arguments. At least one argument (reg1) must be valid (not NoVReg).
-bool AreSameFormat(const VRegister& reg1,
- const VRegister& reg2,
- const VRegister& reg3 = NoVReg,
- const VRegister& reg4 = NoVReg);
-
-
-// AreConsecutive returns true if all of the specified VRegisters are
-// consecutive in the register file. Arguments set to NoReg are ignored, as are
-// any subsequent arguments. At least one argument (reg1) must be valid
-// (not NoVReg).
-bool AreConsecutive(const VRegister& reg1,
- const VRegister& reg2,
- const VRegister& reg3 = NoVReg,
- const VRegister& reg4 = NoVReg);
-
-
// Lists of registers.
class CPURegList {
public:
@@ -580,6 +70,28 @@ class CPURegList {
VIXL_ASSERT(IsValid());
}
+ // Construct an empty CPURegList with the specified size and type. If `size`
+ // is CPURegister::kUnknownSize and the register type requires a size, a valid
+ // but unspecified default will be picked.
+ static CPURegList Empty(CPURegister::RegisterType type,
+ unsigned size = CPURegister::kUnknownSize) {
+ return CPURegList(type, GetDefaultSizeFor(type, size), 0);
+ }
+
+ // Construct a CPURegList with all possible registers with the specified size
+ // and type. If `size` is CPURegister::kUnknownSize and the register type
+ // requires a size, a valid but unspecified default will be picked.
+ static CPURegList All(CPURegister::RegisterType type,
+ unsigned size = CPURegister::kUnknownSize) {
+ unsigned number_of_registers = (CPURegister::GetMaxCodeFor(type) + 1);
+ RegList list = (static_cast<RegList>(1) << number_of_registers) - 1;
+ if (type == CPURegister::kRegister) {
+ // GetMaxCodeFor(kRegister) ignores SP, so explicitly include it.
+ list |= (static_cast<RegList>(1) << kSPRegInternalCode);
+ }
+ return CPURegList(type, GetDefaultSizeFor(type, size), list);
+ }
+
CPURegister::RegisterType GetType() const {
VIXL_ASSERT(IsValid());
return type_;
@@ -588,6 +100,10 @@ class CPURegList {
return GetType();
}
+ CPURegister::RegisterBank GetBank() const {
+ return CPURegister::GetBankFor(GetType());
+ }
+
// Combine another CPURegList into this one. Registers that already exist in
// this list are left unchanged. The type and size of the registers in the
// 'other' list must match those in this list.
@@ -684,8 +200,11 @@ class CPURegList {
// preparing registers for an AAPCS64 function call, for example.
void RemoveCalleeSaved();
- CPURegister PopLowestIndex();
- CPURegister PopHighestIndex();
+ // Find the register in this list that appears in `mask` with the lowest or
+ // highest code, remove it from the list and return it as a CPURegister. If
+ // the list is empty, leave it unchanged and return NoCPUReg.
+ CPURegister PopLowestIndex(RegList mask = ~static_cast<RegList>(0));
+ CPURegister PopHighestIndex(RegList mask = ~static_cast<RegList>(0));
// AAPCS64 callee-saved registers.
static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
@@ -704,7 +223,7 @@ class CPURegList {
bool IncludesAliasOf(const CPURegister& other) const {
VIXL_ASSERT(IsValid());
- return (type_ == other.GetType()) && IncludesAliasOf(other.GetCode());
+ return (GetBank() == other.GetBank()) && IncludesAliasOf(other.GetCode());
}
bool IncludesAliasOf(int code) const {
@@ -744,6 +263,21 @@ class CPURegList {
}
private:
+ // If `size` is CPURegister::kUnknownSize and the type requires a known size,
+ // then return an arbitrary-but-valid size.
+ //
+ // Otherwise, the size is checked for validity and returned unchanged.
+ static unsigned GetDefaultSizeFor(CPURegister::RegisterType type,
+ unsigned size) {
+ if (size == CPURegister::kUnknownSize) {
+ if (type == CPURegister::kRegister) size = kXRegSize;
+ if (type == CPURegister::kVRegister) size = kQRegSize;
+ // All other types require kUnknownSize.
+ }
+ VIXL_ASSERT(CPURegister(0, size, type).IsValid());
+ return size;
+ }
+
RegList list_;
int size_;
CPURegister::RegisterType type_;
@@ -761,6 +295,7 @@ extern const CPURegList kCalleeSavedV;
extern const CPURegList kCallerSaved;
extern const CPURegList kCallerSavedV;
+class IntegerOperand;
// Operand.
class Operand {
@@ -769,7 +304,9 @@ class Operand {
// where <immediate> is int64_t.
// This is allowed to be an implicit constructor because Operand is
// a wrapper class that doesn't normally perform any type conversion.
- Operand(int64_t immediate = 0); // NOLINT(runtime/explicit)
+ Operand(int64_t immediate); // NOLINT(runtime/explicit)
+
+ Operand(IntegerOperand immediate); // NOLINT(runtime/explicit)
// rm, {<shift> #<shift_amount>}
// where <shift> is one of {LSL, LSR, ASR, ROR}.
@@ -883,6 +420,16 @@ class MemOperand {
return shift_amount_;
}
+ // True for MemOperands which represent something like [x0].
+ // Currently, this will also return true for [x0, #0], because MemOperand has
+ // no way to distinguish the two.
+ bool IsPlainRegister() const;
+
+ // True for MemOperands which represent something like [x0], or for compound
+ // MemOperands which are functionally equivalent, such as [x0, #0], [x0, xzr]
+ // or [x0, wzr, UXTW #3].
+ bool IsEquivalentToPlainRegister() const;
+
// True for immediate-offset (but not indexed) MemOperands.
bool IsImmediateOffset() const;
// True for register-offset (but not indexed) MemOperands.
@@ -918,6 +465,448 @@ class MemOperand {
unsigned shift_amount_;
};
+// SVE supports memory operands which don't make sense to the core ISA, such as
+// scatter-gather forms, in which either the base or offset registers are
+// vectors. This class exists to avoid complicating core-ISA code with
+// SVE-specific behaviour.
+//
+// Note that SVE does not support any pre- or post-index modes.
+class SVEMemOperand {
+ public:
+ // "vector-plus-immediate", like [z0.s, #21]
+ explicit SVEMemOperand(ZRegister base, uint64_t offset = 0)
+ : base_(base),
+ regoffset_(NoReg),
+ offset_(RawbitsToInt64(offset)),
+ mod_(NO_SVE_OFFSET_MODIFIER),
+ shift_amount_(0) {
+ VIXL_ASSERT(IsVectorPlusImmediate());
+ VIXL_ASSERT(IsValid());
+ }
+
+ // "scalar-plus-immediate", like [x0], [x0, #42] or [x0, #42, MUL_VL]
+ // The only supported modifiers are NO_SVE_OFFSET_MODIFIER or SVE_MUL_VL.
+ //
+ // Note that VIXL cannot currently distinguish between `SVEMemOperand(x0)` and
+ // `SVEMemOperand(x0, 0)`. This is only significant in scalar-plus-scalar
+ // instructions where xm defaults to xzr. However, users should not rely on
+ // `SVEMemOperand(x0, 0)` being accepted in such cases.
+ explicit SVEMemOperand(Register base,
+ uint64_t offset = 0,
+ SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER)
+ : base_(base),
+ regoffset_(NoReg),
+ offset_(RawbitsToInt64(offset)),
+ mod_(mod),
+ shift_amount_(0) {
+ VIXL_ASSERT(IsScalarPlusImmediate());
+ VIXL_ASSERT(IsValid());
+ }
+
+ // "scalar-plus-scalar", like [x0, x1]
+ // "scalar-plus-vector", like [x0, z1.d]
+ SVEMemOperand(Register base, CPURegister offset)
+ : base_(base),
+ regoffset_(offset),
+ offset_(0),
+ mod_(NO_SVE_OFFSET_MODIFIER),
+ shift_amount_(0) {
+ VIXL_ASSERT(IsScalarPlusScalar() || IsScalarPlusVector());
+ if (offset.IsZero()) VIXL_ASSERT(IsEquivalentToScalar());
+ VIXL_ASSERT(IsValid());
+ }
+
+ // "scalar-plus-vector", like [x0, z1.d, UXTW]
+ // The type of `mod` can be any `SVEOffsetModifier` (other than LSL), or a
+ // corresponding `Extend` value.
+ template <typename M>
+ SVEMemOperand(Register base, ZRegister offset, M mod)
+ : base_(base),
+ regoffset_(offset),
+ offset_(0),
+ mod_(GetSVEOffsetModifierFor(mod)),
+ shift_amount_(0) {
+ VIXL_ASSERT(mod_ != SVE_LSL); // LSL requires an explicit shift amount.
+ VIXL_ASSERT(IsScalarPlusVector());
+ VIXL_ASSERT(IsValid());
+ }
+
+ // "scalar-plus-scalar", like [x0, x1, LSL #1]
+ // "scalar-plus-vector", like [x0, z1.d, LSL #2]
+ // The type of `mod` can be any `SVEOffsetModifier`, or a corresponding
+ // `Shift` or `Extend` value.
+ template <typename M>
+ SVEMemOperand(Register base, CPURegister offset, M mod, unsigned shift_amount)
+ : base_(base),
+ regoffset_(offset),
+ offset_(0),
+ mod_(GetSVEOffsetModifierFor(mod)),
+ shift_amount_(shift_amount) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ // "vector-plus-vector", like [z0.d, z1.d, UXTW]
+ template <typename M = SVEOffsetModifier>
+ SVEMemOperand(ZRegister base,
+ ZRegister offset,
+ M mod = NO_SVE_OFFSET_MODIFIER,
+ unsigned shift_amount = 0)
+ : base_(base),
+ regoffset_(offset),
+ offset_(0),
+ mod_(GetSVEOffsetModifierFor(mod)),
+ shift_amount_(shift_amount) {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(IsVectorPlusVector());
+ }
+
+ // True for SVEMemOperands which represent something like [x0].
+ // This will also return true for [x0, #0], because there is no way
+ // to distinguish the two.
+ bool IsPlainScalar() const {
+ return IsScalarPlusImmediate() && (offset_ == 0);
+ }
+
+ // True for SVEMemOperands which represent something like [x0], or for
+ // compound SVEMemOperands which are functionally equivalent, such as
+ // [x0, #0], [x0, xzr] or [x0, wzr, UXTW #3].
+ bool IsEquivalentToScalar() const;
+
+ // True for SVEMemOperands like [x0], [x0, #0], false for [x0, xzr] and
+ // similar.
+ bool IsPlainRegister() const;
+
+ bool IsScalarPlusImmediate() const {
+ return base_.IsX() && regoffset_.IsNone() &&
+ ((mod_ == NO_SVE_OFFSET_MODIFIER) || IsMulVl());
+ }
+
+ bool IsScalarPlusScalar() const {
+ // SVE offers no extend modes for scalar-plus-scalar, so both registers must
+ // be X registers.
+ return base_.IsX() && regoffset_.IsX() &&
+ ((mod_ == NO_SVE_OFFSET_MODIFIER) || (mod_ == SVE_LSL));
+ }
+
+ bool IsScalarPlusVector() const {
+ // The modifier can be LSL or an an extend mode (UXTW or SXTW) here. Unlike
+ // in the core ISA, these extend modes do not imply an S-sized lane, so the
+ // modifier is independent from the lane size. The architecture describes
+ // [US]XTW with a D-sized lane as an "unpacked" offset.
+ return base_.IsX() && regoffset_.IsZRegister() &&
+ (regoffset_.IsLaneSizeS() || regoffset_.IsLaneSizeD()) && !IsMulVl();
+ }
+
+ bool IsVectorPlusImmediate() const {
+ return base_.IsZRegister() &&
+ (base_.IsLaneSizeS() || base_.IsLaneSizeD()) &&
+ regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER);
+ }
+
+ bool IsVectorPlusVector() const {
+ return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) &&
+ AreSameFormat(base_, regoffset_) &&
+ (base_.IsLaneSizeS() || base_.IsLaneSizeD());
+ }
+
+ bool IsContiguous() const { return !IsScatterGather(); }
+ bool IsScatterGather() const {
+ return base_.IsZRegister() || regoffset_.IsZRegister();
+ }
+
+ // TODO: If necessary, add helpers like `HasScalarBase()`.
+
+ Register GetScalarBase() const {
+ VIXL_ASSERT(base_.IsX());
+ return Register(base_);
+ }
+
+ ZRegister GetVectorBase() const {
+ VIXL_ASSERT(base_.IsZRegister());
+ VIXL_ASSERT(base_.HasLaneSize());
+ return ZRegister(base_);
+ }
+
+ Register GetScalarOffset() const {
+ VIXL_ASSERT(regoffset_.IsRegister());
+ return Register(regoffset_);
+ }
+
+ ZRegister GetVectorOffset() const {
+ VIXL_ASSERT(regoffset_.IsZRegister());
+ VIXL_ASSERT(regoffset_.HasLaneSize());
+ return ZRegister(regoffset_);
+ }
+
+ int64_t GetImmediateOffset() const {
+ VIXL_ASSERT(regoffset_.IsNone());
+ return offset_;
+ }
+
+ SVEOffsetModifier GetOffsetModifier() const { return mod_; }
+ unsigned GetShiftAmount() const { return shift_amount_; }
+
+ bool IsEquivalentToLSL(unsigned amount) const {
+ if (shift_amount_ != amount) return false;
+ if (amount == 0) {
+ // No-shift is equivalent to "LSL #0".
+ return ((mod_ == SVE_LSL) || (mod_ == NO_SVE_OFFSET_MODIFIER));
+ }
+ return mod_ == SVE_LSL;
+ }
+
+ bool IsMulVl() const { return mod_ == SVE_MUL_VL; }
+
+ bool IsValid() const;
+
+ private:
+ // Allow standard `Shift` and `Extend` arguments to be used.
+ SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) {
+ if (shift == LSL) return SVE_LSL;
+ if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER;
+ // SVE does not accept any other shift.
+ VIXL_UNIMPLEMENTED();
+ return NO_SVE_OFFSET_MODIFIER;
+ }
+
+ SVEOffsetModifier GetSVEOffsetModifierFor(Extend extend = NO_EXTEND) {
+ if (extend == UXTW) return SVE_UXTW;
+ if (extend == SXTW) return SVE_SXTW;
+ if (extend == NO_EXTEND) return NO_SVE_OFFSET_MODIFIER;
+ // SVE does not accept any other extend mode.
+ VIXL_UNIMPLEMENTED();
+ return NO_SVE_OFFSET_MODIFIER;
+ }
+
+ SVEOffsetModifier GetSVEOffsetModifierFor(SVEOffsetModifier mod) {
+ return mod;
+ }
+
+ CPURegister base_;
+ CPURegister regoffset_;
+ int64_t offset_;
+ SVEOffsetModifier mod_;
+ unsigned shift_amount_;
+};
+
+// Represent a signed or unsigned integer operand.
+//
+// This is designed to make instructions which naturally accept a _signed_
+// immediate easier to implement and use, when we also want users to be able to
+// specify raw-bits values (such as with hexadecimal constants). The advantage
+// of this class over a simple uint64_t (with implicit C++ sign-extension) is
+// that this class can strictly check the range of allowed values. With a simple
+// uint64_t, it is impossible to distinguish -1 from UINT64_MAX.
+//
+// For example, these instructions are equivalent:
+//
+// __ Insr(z0.VnB(), -1);
+// __ Insr(z0.VnB(), 0xff);
+//
+// ... as are these:
+//
+// __ Insr(z0.VnD(), -1);
+// __ Insr(z0.VnD(), 0xffffffffffffffff);
+//
+// ... but this is invalid:
+//
+// __ Insr(z0.VnB(), 0xffffffffffffffff); // Too big for B-sized lanes.
+class IntegerOperand {
+ public:
+#define VIXL_INT_TYPES(V) \
+ V(char) V(short) V(int) V(long) V(long long) // NOLINT(runtime/int)
+#define VIXL_DECL_INT_OVERLOADS(T) \
+ /* These are allowed to be implicit constructors because this is a */ \
+ /* wrapper class that doesn't normally perform any type conversion. */ \
+ IntegerOperand(signed T immediate) /* NOLINT(runtime/explicit) */ \
+ : raw_bits_(immediate), /* Allow implicit sign-extension. */ \
+ is_negative_(immediate < 0) {} \
+ IntegerOperand(unsigned T immediate) /* NOLINT(runtime/explicit) */ \
+ : raw_bits_(immediate), is_negative_(false) {}
+ VIXL_INT_TYPES(VIXL_DECL_INT_OVERLOADS)
+#undef VIXL_DECL_INT_OVERLOADS
+#undef VIXL_INT_TYPES
+
+ // TODO: `Operand` can currently only hold an int64_t, so some large, unsigned
+ // values will be misrepresented here.
+ explicit IntegerOperand(const Operand& operand)
+ : raw_bits_(operand.GetEquivalentImmediate()),
+ is_negative_(operand.GetEquivalentImmediate() < 0) {}
+
+ bool IsIntN(unsigned n) const {
+ return is_negative_ ? vixl::IsIntN(n, RawbitsToInt64(raw_bits_))
+ : vixl::IsIntN(n, raw_bits_);
+ }
+ bool IsUintN(unsigned n) const {
+ return !is_negative_ && vixl::IsUintN(n, raw_bits_);
+ }
+
+ bool IsUint8() const { return IsUintN(8); }
+ bool IsUint16() const { return IsUintN(16); }
+ bool IsUint32() const { return IsUintN(32); }
+ bool IsUint64() const { return IsUintN(64); }
+
+ bool IsInt8() const { return IsIntN(8); }
+ bool IsInt16() const { return IsIntN(16); }
+ bool IsInt32() const { return IsIntN(32); }
+ bool IsInt64() const { return IsIntN(64); }
+
+ bool FitsInBits(unsigned n) const {
+ return is_negative_ ? IsIntN(n) : IsUintN(n);
+ }
+ bool FitsInLane(const CPURegister& zd) const {
+ return FitsInBits(zd.GetLaneSizeInBits());
+ }
+ bool FitsInSignedLane(const CPURegister& zd) const {
+ return IsIntN(zd.GetLaneSizeInBits());
+ }
+ bool FitsInUnsignedLane(const CPURegister& zd) const {
+ return IsUintN(zd.GetLaneSizeInBits());
+ }
+
+ // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to an unsigned integer
+ // in the range [0, UINT<n>_MAX] (using two's complement mapping).
+ uint64_t AsUintN(unsigned n) const {
+ VIXL_ASSERT(FitsInBits(n));
+ return raw_bits_ & GetUintMask(n);
+ }
+
+ uint8_t AsUint8() const { return static_cast<uint8_t>(AsUintN(8)); }
+ uint16_t AsUint16() const { return static_cast<uint16_t>(AsUintN(16)); }
+ uint32_t AsUint32() const { return static_cast<uint32_t>(AsUintN(32)); }
+ uint64_t AsUint64() const { return AsUintN(64); }
+
+ // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to a signed integer in
+ // the range [INT<n>_MIN, INT<n>_MAX] (using two's complement mapping).
+ int64_t AsIntN(unsigned n) const {
+ VIXL_ASSERT(FitsInBits(n));
+ return ExtractSignedBitfield64(n - 1, 0, raw_bits_);
+ }
+
+ int8_t AsInt8() const { return static_cast<int8_t>(AsIntN(8)); }
+ int16_t AsInt16() const { return static_cast<int16_t>(AsIntN(16)); }
+ int32_t AsInt32() const { return static_cast<int32_t>(AsIntN(32)); }
+ int64_t AsInt64() const { return AsIntN(64); }
+
+ // Several instructions encode a signed int<N>_t, which is then (optionally)
+ // left-shifted and sign-extended to a Z register lane with a size which may
+ // be larger than N. This helper tries to find an int<N>_t such that the
+ // IntegerOperand's arithmetic value is reproduced in each lane.
+ //
+ // This is the mechanism that allows `Insr(z0.VnB(), 0xff)` to be treated as
+ // `Insr(z0.VnB(), -1)`.
+ template <unsigned N, unsigned kShift, typename T>
+ bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, T* imm) const {
+ VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N);
+ VIXL_ASSERT(FitsInLane(zd));
+ if ((raw_bits_ & GetUintMask(kShift)) != 0) return false;
+
+ // Reverse the specified left-shift.
+ IntegerOperand unshifted(*this);
+ unshifted.ArithmeticShiftRight(kShift);
+
+ if (unshifted.IsIntN(N)) {
+ // This is trivial, since sign-extension produces the same arithmetic
+ // value irrespective of the destination size.
+ *imm = static_cast<T>(unshifted.AsIntN(N));
+ return true;
+ }
+
+ // Otherwise, we might be able to use the sign-extension to produce the
+ // desired bit pattern. We can only do this for values in the range
+ // [INT<N>_MAX + 1, UINT<N>_MAX], where the highest set bit is the sign bit.
+ //
+ // The lane size has to be adjusted to compensate for `kShift`, since the
+ // high bits will be dropped when the encoded value is left-shifted.
+ if (unshifted.IsUintN(zd.GetLaneSizeInBits() - kShift)) {
+ int64_t encoded = unshifted.AsIntN(zd.GetLaneSizeInBits() - kShift);
+ if (vixl::IsIntN(N, encoded)) {
+ *imm = static_cast<T>(encoded);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // As above, but `kShift` is written to the `*shift` parameter on success, so
+ // that it is easy to chain calls like this:
+ //
+ // if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+ // imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+ // insn(zd, imm8, shift)
+ // }
+ template <unsigned N, unsigned kShift, typename T, typename S>
+ bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd,
+ T* imm,
+ S* shift) const {
+ if (TryEncodeAsShiftedIntNForLane<N, kShift>(zd, imm)) {
+ *shift = kShift;
+ return true;
+ }
+ return false;
+ }
+
+ // As above, but assume that `kShift` is 0.
+ template <unsigned N, typename T>
+ bool TryEncodeAsIntNForLane(const CPURegister& zd, T* imm) const {
+ return TryEncodeAsShiftedIntNForLane<N, 0>(zd, imm);
+ }
+
+ // As above, but for unsigned fields. This is usuaully a simple operation, but
+ // is provided for symmetry.
+ template <unsigned N, unsigned kShift, typename T>
+ bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const {
+ VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N);
+ VIXL_ASSERT(FitsInLane(zd));
+
+ // TODO: Should we convert -1 to 0xff here?
+ if (is_negative_) return false;
+ USE(zd);
+
+ if ((raw_bits_ & GetUintMask(kShift)) != 0) return false;
+
+ if (vixl::IsUintN(N, raw_bits_ >> kShift)) {
+ *imm = static_cast<T>(raw_bits_ >> kShift);
+ return true;
+ }
+ return false;
+ }
+
+ template <unsigned N, unsigned kShift, typename T, typename S>
+ bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd,
+ T* imm,
+ S* shift) const {
+ if (TryEncodeAsShiftedUintNForLane<N, kShift>(zd, imm)) {
+ *shift = kShift;
+ return true;
+ }
+ return false;
+ }
+
+ bool IsZero() const { return raw_bits_ == 0; }
+ bool IsNegative() const { return is_negative_; }
+ bool IsPositiveOrZero() const { return !is_negative_; }
+
+ uint64_t GetMagnitude() const {
+ return is_negative_ ? -raw_bits_ : raw_bits_;
+ }
+
+ private:
+ // Shift the arithmetic value right, with sign extension if is_negative_.
+ void ArithmeticShiftRight(int shift) {
+ VIXL_ASSERT((shift >= 0) && (shift < 64));
+ if (shift == 0) return;
+ if (is_negative_) {
+ raw_bits_ = ExtractSignedBitfield64(63, shift, raw_bits_);
+ } else {
+ raw_bits_ >>= shift;
+ }
+ }
+
+ uint64_t raw_bits_;
+ bool is_negative_;
+};
+
// This an abstraction that can represent a register or memory location. The
// `MacroAssembler` provides helpers to move data between generic operands.
class GenericOperand {
diff --git a/src/aarch64/registers-aarch64.cc b/src/aarch64/registers-aarch64.cc
new file mode 100644
index 00000000..735f43c7
--- /dev/null
+++ b/src/aarch64/registers-aarch64.cc
@@ -0,0 +1,321 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sstream>
+#include <string>
+
+#include "registers-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+std::string CPURegister::GetArchitecturalName() const {
+ std::ostringstream name;
+ if (IsZRegister()) {
+ name << 'z' << GetCode();
+ if (HasLaneSize()) {
+ name << '.' << GetLaneSizeSymbol();
+ }
+ } else if (IsPRegister()) {
+ name << 'p' << GetCode();
+ if (HasLaneSize()) {
+ name << '.' << GetLaneSizeSymbol();
+ }
+ switch (qualifiers_) {
+ case kNoQualifiers:
+ break;
+ case kMerging:
+ name << "/m";
+ break;
+ case kZeroing:
+ name << "/z";
+ break;
+ }
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+ return name.str();
+}
+
+unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) {
+ switch (bank) {
+ case kNoRegisterBank:
+ return 0;
+ case kRRegisterBank:
+ return Register::GetMaxCode();
+ case kVRegisterBank:
+#ifdef VIXL_HAS_CONSTEXPR
+ VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#else
+ VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#endif
+ return VRegister::GetMaxCode();
+ case kPRegisterBank:
+ return PRegister::GetMaxCode();
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+bool CPURegister::IsValidRegister() const {
+ return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) &&
+ (bank_ == kRRegisterBank) &&
+ ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) &&
+ (qualifiers_ == kNoQualifiers) && (lane_size_ == size_);
+}
+
+bool CPURegister::IsValidVRegister() const {
+ VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+ return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) &&
+ ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) &&
+ (qualifiers_ == kNoQualifiers) &&
+ (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_);
+}
+
+bool CPURegister::IsValidFPRegister() const {
+ return IsValidVRegister() && IsFPRegister();
+}
+
+bool CPURegister::IsValidZRegister() const {
+ VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+ // Z registers are valid with or without a lane size, so we don't need to
+ // check lane_size_.
+ return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) &&
+ (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers);
+}
+
+bool CPURegister::IsValidPRegister() const {
+ VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+ // P registers are valid with or without a lane size, so we don't need to
+ // check lane_size_.
+ return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) &&
+ (size_ == kEncodedUnknownSize) &&
+ ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) ||
+ (qualifiers_ == kZeroing));
+}
+
+bool CPURegister::IsValid() const {
+ return IsValidRegister() || IsValidVRegister() || IsValidZRegister() ||
+ IsValidPRegister();
+}
+
+// Most coersions simply invoke the necessary constructor.
+#define VIXL_CPUREG_COERCION_LIST(U) \
+ U(Register, W, R) \
+ U(Register, X, R) \
+ U(VRegister, B, V) \
+ U(VRegister, H, V) \
+ U(VRegister, S, V) \
+ U(VRegister, D, V) \
+ U(VRegister, Q, V) \
+ U(VRegister, V, V) \
+ U(ZRegister, Z, V) \
+ U(PRegister, P, P)
+#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \
+ RET_TYPE CPURegister::CTOR_TYPE() const { \
+ VIXL_ASSERT(GetBank() == k##BANK##RegisterBank); \
+ return CTOR_TYPE##Register(GetCode()); \
+ }
+VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION)
+#undef VIXL_CPUREG_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_COERCION
+
+// NEON lane-format coersions always return VRegisters.
+#define VIXL_CPUREG_NEON_COERCION_LIST(V) \
+ V(8, B) \
+ V(16, B) \
+ V(2, H) \
+ V(4, H) \
+ V(8, H) \
+ V(2, S) \
+ V(4, S) \
+ V(1, D) \
+ V(2, D)
+#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE) \
+ VRegister VRegister::V##LANES##LANE_TYPE() const { \
+ VIXL_ASSERT(IsVRegister()); \
+ return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \
+ }
+VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION)
+#undef VIXL_CPUREG_NEON_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_NEON_COERCION
+
+// Semantic type coersion for sdot and udot.
+// TODO: Use the qualifiers_ field to distinguish this from ::S().
+VRegister VRegister::S4B() const {
+ VIXL_ASSERT(IsVRegister());
+ return SRegister(GetCode());
+}
+
+bool AreAliased(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4,
+ const CPURegister& reg5,
+ const CPURegister& reg6,
+ const CPURegister& reg7,
+ const CPURegister& reg8) {
+ int number_of_valid_regs = 0;
+ int number_of_valid_vregs = 0;
+ int number_of_valid_pregs = 0;
+
+ RegList unique_regs = 0;
+ RegList unique_vregs = 0;
+ RegList unique_pregs = 0;
+
+ const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
+
+ for (size_t i = 0; i < ArrayLength(regs); i++) {
+ switch (regs[i].GetBank()) {
+ case CPURegister::kRRegisterBank:
+ number_of_valid_regs++;
+ unique_regs |= regs[i].GetBit();
+ break;
+ case CPURegister::kVRegisterBank:
+ number_of_valid_vregs++;
+ unique_vregs |= regs[i].GetBit();
+ break;
+ case CPURegister::kPRegisterBank:
+ number_of_valid_pregs++;
+ unique_pregs |= regs[i].GetBit();
+ break;
+ case CPURegister::kNoRegisterBank:
+ VIXL_ASSERT(regs[i].IsNone());
+ break;
+ }
+ }
+
+ int number_of_unique_regs = CountSetBits(unique_regs);
+ int number_of_unique_vregs = CountSetBits(unique_vregs);
+ int number_of_unique_pregs = CountSetBits(unique_pregs);
+
+ VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
+ VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs);
+ VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs);
+
+ return (number_of_valid_regs != number_of_unique_regs) ||
+ (number_of_valid_vregs != number_of_unique_vregs) ||
+ (number_of_valid_pregs != number_of_unique_pregs);
+}
+
+bool AreSameSizeAndType(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4,
+ const CPURegister& reg5,
+ const CPURegister& reg6,
+ const CPURegister& reg7,
+ const CPURegister& reg8) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool match = true;
+ match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
+ match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
+ match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
+ match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
+ match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
+ match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
+ match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
+ return match;
+}
+
+bool AreEven(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4,
+ const CPURegister& reg5,
+ const CPURegister& reg6,
+ const CPURegister& reg7,
+ const CPURegister& reg8) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool even = (reg1.GetCode() % 2) == 0;
+ even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
+ even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
+ even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
+ even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
+ even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
+ even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
+ even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
+ return even;
+}
+
+bool AreConsecutive(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4) {
+ VIXL_ASSERT(reg1.IsValid());
+
+ if (!reg2.IsValid()) {
+ return true;
+ } else if (reg2.GetCode() !=
+ ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+ return false;
+ }
+
+ if (!reg3.IsValid()) {
+ return true;
+ } else if (reg3.GetCode() !=
+ ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+ return false;
+ }
+
+ if (!reg4.IsValid()) {
+ return true;
+ } else if (reg4.GetCode() !=
+ ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+ return false;
+ }
+
+ return true;
+}
+
+bool AreSameFormat(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool match = true;
+ match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
+ match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
+ match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
+ return match;
+}
+
+bool AreSameLaneSize(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3,
+ const CPURegister& reg4) {
+ VIXL_ASSERT(reg1.IsValid());
+ bool match = true;
+ match &=
+ !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+ match &=
+ !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+ match &=
+ !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+ return match;
+}
+}
+} // namespace vixl::aarch64
diff --git a/src/aarch64/registers-aarch64.h b/src/aarch64/registers-aarch64.h
new file mode 100644
index 00000000..911974a8
--- /dev/null
+++ b/src/aarch64/registers-aarch64.h
@@ -0,0 +1,900 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_
+#define VIXL_AARCH64_REGISTERS_AARCH64_H_
+
+#include <string>
+
+#include "instructions-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// An integer type capable of representing a homogeneous, non-overlapping set of
+// registers as a bitmask of their codes.
+typedef uint64_t RegList;
+static const int kRegListSizeInBits = sizeof(RegList) * 8;
+
+class Register;
+class WRegister;
+class XRegister;
+
+class VRegister;
+class BRegister;
+class HRegister;
+class SRegister;
+class DRegister;
+class QRegister;
+
+class ZRegister;
+
+class PRegister;
+class PRegisterWithLaneSize;
+class PRegisterM;
+class PRegisterZ;
+
+// A container for any single register supported by the processor. Selected
+// qualifications are also supported. Basic registers can be constructed
+// directly as CPURegister objects. Other variants should be constructed as one
+// of the derived classes.
+//
+// CPURegister aims to support any getter that would also be available to more
+// specialised register types. However, using the equivalent functions on the
+// specialised register types can avoid run-time checks, and should therefore be
+// preferred where run-time polymorphism isn't required.
+//
+// Type-specific modifers are typically implemented only on the derived classes.
+//
+// The encoding is such that CPURegister objects are cheap to pass by value.
+class CPURegister {
+ public:
+ enum RegisterBank : uint8_t {
+ kNoRegisterBank = 0,
+ kRRegisterBank,
+ kVRegisterBank,
+ kPRegisterBank
+ };
+ enum RegisterType {
+ kNoRegister,
+ kRegister,
+ kVRegister,
+ kZRegister,
+ kPRegister
+ };
+
+ static const unsigned kUnknownSize = 0;
+
+ VIXL_CONSTEXPR CPURegister()
+ : code_(0),
+ bank_(kNoRegisterBank),
+ size_(kEncodedUnknownSize),
+ qualifiers_(kNoQualifiers),
+ lane_size_(kEncodedUnknownSize) {}
+
+ CPURegister(int code, int size_in_bits, RegisterType type)
+ : code_(code),
+ bank_(GetBankFor(type)),
+ size_(EncodeSizeInBits(size_in_bits)),
+ qualifiers_(kNoQualifiers),
+ lane_size_(EncodeSizeInBits(size_in_bits)) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ // Basic accessors.
+
+ // TODO: Make this return 'int'.
+ unsigned GetCode() const { return code_; }
+
+ RegisterBank GetBank() const { return bank_; }
+
+ // For scalar registers, the lane size matches the register size, and is
+ // always known.
+ bool HasSize() const { return size_ != kEncodedUnknownSize; }
+ bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; }
+
+ RegList GetBit() const {
+ if (IsNone()) return 0;
+ VIXL_ASSERT(code_ < kRegListSizeInBits);
+ return static_cast<RegList>(1) << code_;
+ }
+
+ // Return the architectural name for this register.
+ // TODO: This is temporary. Ultimately, we should move the
+ // Simulator::*RegNameForCode helpers out of the simulator, and provide an
+ // independent way to obtain the name of a register.
+ std::string GetArchitecturalName() const;
+
+ // Return the highest valid register code for this type, to allow generic
+ // loops to be written. This excludes kSPRegInternalCode, since it is not
+ // contiguous, and sp usually requires special handling anyway.
+ unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); }
+
+ // Registers without a known size report kUnknownSize.
+ int GetSizeInBits() const { return DecodeSizeInBits(size_); }
+ int GetSizeInBytes() const { return DecodeSizeInBytes(size_); }
+ // TODO: Make these return 'int'.
+ unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); }
+ unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); }
+ unsigned GetLaneSizeInBytesLog2() const {
+ VIXL_ASSERT(HasLaneSize());
+ return DecodeSizeInBytesLog2(lane_size_);
+ }
+
+ int GetLanes() const {
+ if (HasSize() && HasLaneSize()) {
+ // Take advantage of the size encoding to calculate this efficiently.
+ VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1));
+ VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1));
+ VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1));
+ VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1));
+ int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_);
+ VIXL_ASSERT(log2_delta >= 0);
+ return 1 << log2_delta;
+ }
+ return kUnknownSize;
+ }
+
+ bool Is8Bits() const { return size_ == kEncodedBRegSize; }
+ bool Is16Bits() const { return size_ == kEncodedHRegSize; }
+ bool Is32Bits() const { return size_ == kEncodedSRegSize; }
+ bool Is64Bits() const { return size_ == kEncodedDRegSize; }
+ bool Is128Bits() const { return size_ == kEncodedQRegSize; }
+
+ bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; }
+ bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; }
+ bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; }
+ bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; }
+ bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; }
+
+ // If Is<Foo>Register(), then it is valid to convert the CPURegister to some
+ // <Foo>Register<Bar> type.
+ //
+ // If... ... then it is safe to construct ...
+ // r.IsRegister() -> Register(r)
+ // r.IsVRegister() -> VRegister(r)
+ // r.IsZRegister() -> ZRegister(r)
+ // r.IsPRegister() -> PRegister(r)
+ //
+ // r.IsPRegister() && HasLaneSize() -> PRegisterWithLaneSize(r)
+ // r.IsPRegister() && IsMerging() -> PRegisterM(r)
+ // r.IsPRegister() && IsZeroing() -> PRegisterZ(r)
+ bool IsRegister() const { return GetType() == kRegister; }
+ bool IsVRegister() const { return GetType() == kVRegister; }
+ bool IsZRegister() const { return GetType() == kZRegister; }
+ bool IsPRegister() const { return GetType() == kPRegister; }
+
+ bool IsNone() const { return GetType() == kNoRegister; }
+
+ // `GetType() == kNoRegister` implies IsNone(), and vice-versa.
+ // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa.
+ RegisterType GetType() const {
+ switch (bank_) {
+ case kNoRegisterBank:
+ return kNoRegister;
+ case kRRegisterBank:
+ return kRegister;
+ case kVRegisterBank:
+ return HasSize() ? kVRegister : kZRegister;
+ case kPRegisterBank:
+ return kPRegister;
+ }
+ VIXL_UNREACHABLE();
+ return kNoRegister;
+ }
+
+ // IsFPRegister() is true for scalar FP types (and therefore implies
+ // IsVRegister()). There is no corresponding FPRegister type.
+ bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
+
+ // TODO: These are stricter forms of the helpers above. We should make the
+ // basic helpers strict, and remove these.
+ bool IsValidRegister() const;
+ bool IsValidVRegister() const;
+ bool IsValidFPRegister() const;
+ bool IsValidZRegister() const;
+ bool IsValidPRegister() const;
+
+ bool IsValid() const;
+ bool IsValidOrNone() const { return IsNone() || IsValid(); }
+
+ bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); }
+ bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); }
+
+ bool IsSameType(const CPURegister& other) const {
+ return GetType() == other.GetType();
+ }
+
+ bool IsSameBank(const CPURegister& other) const {
+ return GetBank() == other.GetBank();
+ }
+
+ // Two registers with unknown size are considered to have the same size if
+ // they also have the same type. For example, all Z registers have the same
+ // size, even though we don't know what that is.
+ bool IsSameSizeAndType(const CPURegister& other) const {
+ return IsSameType(other) && (size_ == other.size_);
+ }
+
+ bool IsSameFormat(const CPURegister& other) const {
+ return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_);
+ }
+
+ // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'.
+ bool Aliases(const CPURegister& other) const {
+ return IsSameBank(other) && (code_ == other.code_);
+ }
+
+ bool Is(const CPURegister& other) const {
+ if (IsRegister() || IsVRegister()) {
+ // For core (W, X) and FP/NEON registers, we only consider the code, size
+ // and type. This is legacy behaviour.
+ // TODO: We should probably check every field for all registers.
+ return Aliases(other) && (size_ == other.size_);
+ } else {
+ // For Z and P registers, we require all fields to match exactly.
+ VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister());
+ return (code_ == other.code_) && (bank_ == other.bank_) &&
+ (size_ == other.size_) && (qualifiers_ == other.qualifiers_) &&
+ (lane_size_ == other.lane_size_);
+ }
+ }
+
+ // Conversions to specific register types. The result is a register that
+ // aliases the original CPURegister. That is, the original register bank
+ // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all
+ // other properties are ignored.
+ //
+ // Typical usage:
+ //
+ // if (reg.GetBank() == kVRegisterBank) {
+ // DRegister d = reg.D();
+ // ...
+ // }
+ //
+ // These could all return types with compile-time guarantees (like XRegister),
+ // but this breaks backwards-compatibility quite severely, particularly with
+ // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type.
+
+ // Core registers, like "w0".
+ Register W() const;
+ Register X() const;
+ // FP/NEON registers, like "b0".
+ VRegister B() const;
+ VRegister H() const;
+ VRegister S() const;
+ VRegister D() const;
+ VRegister Q() const;
+ VRegister V() const;
+ // SVE registers, like "z0".
+ ZRegister Z() const;
+ PRegister P() const;
+
+ // Utilities for kRegister types.
+
+ bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); }
+ bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); }
+ bool IsW() const { return IsRegister() && Is32Bits(); }
+ bool IsX() const { return IsRegister() && Is64Bits(); }
+
+ // Utilities for FP/NEON kVRegister types.
+
+ // These helpers ensure that the size and type of the register are as
+ // described. They do not consider the number of lanes that make up a vector.
+ // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
+ // does not imply Is1D() or Is8B().
+ // Check the number of lanes, ie. the format of the vector, using methods such
+ // as Is8B(), Is1D(), etc.
+ bool IsB() const { return IsVRegister() && Is8Bits(); }
+ bool IsH() const { return IsVRegister() && Is16Bits(); }
+ bool IsS() const { return IsVRegister() && Is32Bits(); }
+ bool IsD() const { return IsVRegister() && Is64Bits(); }
+ bool IsQ() const { return IsVRegister() && Is128Bits(); }
+
+ // As above, but also check that the register has exactly one lane. For
+ // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does
+ // not.
+ bool Is1B() const { return IsB() && IsScalar(); }
+ bool Is1H() const { return IsH() && IsScalar(); }
+ bool Is1S() const { return IsS() && IsScalar(); }
+ bool Is1D() const { return IsD() && IsScalar(); }
+ bool Is1Q() const { return IsQ() && IsScalar(); }
+
+ // Check the specific NEON format.
+ bool Is8B() const { return IsD() && IsLaneSizeB(); }
+ bool Is16B() const { return IsQ() && IsLaneSizeB(); }
+ bool Is2H() const { return IsS() && IsLaneSizeH(); }
+ bool Is4H() const { return IsD() && IsLaneSizeH(); }
+ bool Is8H() const { return IsQ() && IsLaneSizeH(); }
+ bool Is2S() const { return IsD() && IsLaneSizeS(); }
+ bool Is4S() const { return IsQ() && IsLaneSizeS(); }
+ bool Is2D() const { return IsQ() && IsLaneSizeD(); }
+
+ // A semantic alias for sdot and udot (indexed and by element) instructions.
+ // The current CPURegister implementation cannot not tell this from Is1S(),
+ // but it might do later.
+ // TODO: Do this with the qualifiers_ field.
+ bool Is1S4B() const { return Is1S(); }
+
+ // Utilities for SVE registers.
+
+ bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; }
+ bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); }
+ bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); }
+
+ // SVE types have unknown sizes, but within known bounds.
+
+ int GetMaxSizeInBytes() const {
+ switch (GetType()) {
+ case kZRegister:
+ return kZRegMaxSizeInBytes;
+ case kPRegister:
+ return kPRegMaxSizeInBytes;
+ default:
+ VIXL_ASSERT(HasSize());
+ return GetSizeInBits();
+ }
+ }
+
+ int GetMinSizeInBytes() const {
+ switch (GetType()) {
+ case kZRegister:
+ return kZRegMinSizeInBytes;
+ case kPRegister:
+ return kPRegMinSizeInBytes;
+ default:
+ VIXL_ASSERT(HasSize());
+ return GetSizeInBits();
+ }
+ }
+
+ int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; }
+ int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; }
+
+ static RegisterBank GetBankFor(RegisterType type) {
+ switch (type) {
+ case kNoRegister:
+ return kNoRegisterBank;
+ case kRegister:
+ return kRRegisterBank;
+ case kVRegister:
+ case kZRegister:
+ return kVRegisterBank;
+ case kPRegister:
+ return kPRegisterBank;
+ }
+ VIXL_UNREACHABLE();
+ return kNoRegisterBank;
+ }
+
+ static unsigned GetMaxCodeFor(CPURegister::RegisterType type) {
+ return GetMaxCodeFor(GetBankFor(type));
+ }
+
+ protected:
+ enum EncodedSize : uint8_t {
+ // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero.
+ kEncodedUnknownSize = 0,
+
+ // The implementation assumes that the remaining sizes are encoded as
+ // `log2(size) + c`, so the following names must remain in sequence.
+ kEncodedBRegSize,
+ kEncodedHRegSize,
+ kEncodedSRegSize,
+ kEncodedDRegSize,
+ kEncodedQRegSize,
+
+ kEncodedWRegSize = kEncodedSRegSize,
+ kEncodedXRegSize = kEncodedDRegSize
+ };
+ VIXL_STATIC_ASSERT(kSRegSize == kWRegSize);
+ VIXL_STATIC_ASSERT(kDRegSize == kXRegSize);
+
+ char GetLaneSizeSymbol() const {
+ switch (lane_size_) {
+ case kEncodedBRegSize:
+ return 'B';
+ case kEncodedHRegSize:
+ return 'H';
+ case kEncodedSRegSize:
+ return 'S';
+ case kEncodedDRegSize:
+ return 'D';
+ case kEncodedQRegSize:
+ return 'Q';
+ case kEncodedUnknownSize:
+ break;
+ }
+ VIXL_UNREACHABLE();
+ return '?';
+ }
+
+ static EncodedSize EncodeSizeInBits(int size_in_bits) {
+ switch (size_in_bits) {
+ case kUnknownSize:
+ return kEncodedUnknownSize;
+ case kBRegSize:
+ return kEncodedBRegSize;
+ case kHRegSize:
+ return kEncodedHRegSize;
+ case kSRegSize:
+ return kEncodedSRegSize;
+ case kDRegSize:
+ return kEncodedDRegSize;
+ case kQRegSize:
+ return kEncodedQRegSize;
+ }
+ VIXL_UNREACHABLE();
+ return kEncodedUnknownSize;
+ }
+
+ static int DecodeSizeInBytesLog2(EncodedSize encoded_size) {
+ switch (encoded_size) {
+ case kEncodedUnknownSize:
+ // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here.
+ VIXL_UNREACHABLE();
+ return -1;
+ case kEncodedBRegSize:
+ return kBRegSizeInBytesLog2;
+ case kEncodedHRegSize:
+ return kHRegSizeInBytesLog2;
+ case kEncodedSRegSize:
+ return kSRegSizeInBytesLog2;
+ case kEncodedDRegSize:
+ return kDRegSizeInBytesLog2;
+ case kEncodedQRegSize:
+ return kQRegSizeInBytesLog2;
+ }
+ VIXL_UNREACHABLE();
+ return kUnknownSize;
+ }
+
+ static int DecodeSizeInBytes(EncodedSize encoded_size) {
+ if (encoded_size == kEncodedUnknownSize) {
+ return kUnknownSize;
+ }
+ return 1 << DecodeSizeInBytesLog2(encoded_size);
+ }
+
+ static int DecodeSizeInBits(EncodedSize encoded_size) {
+ VIXL_STATIC_ASSERT(kUnknownSize == 0);
+ return DecodeSizeInBytes(encoded_size) * kBitsPerByte;
+ }
+
+ static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank);
+
+ enum Qualifiers : uint8_t {
+ kNoQualifiers = 0,
+ // Used by P registers.
+ kMerging,
+ kZeroing
+ };
+
+ // An unchecked constructor, for use by derived classes.
+ CPURegister(int code,
+ EncodedSize size,
+ RegisterBank bank,
+ EncodedSize lane_size,
+ Qualifiers qualifiers = kNoQualifiers)
+ : code_(code),
+ bank_(bank),
+ size_(size),
+ qualifiers_(qualifiers),
+ lane_size_(lane_size) {}
+
+ // TODO: Check that access to these fields is reasonably efficient.
+ uint8_t code_;
+ RegisterBank bank_;
+ EncodedSize size_;
+ Qualifiers qualifiers_;
+ EncodedSize lane_size_;
+};
+// Ensure that CPURegisters can fit in a single (64-bit) register. This is a
+// proxy for being "cheap to pass by value", which is hard to check directly.
+VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t));
+
+// TODO: Add constexpr constructors.
+#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \
+ VIXL_CONSTEXPR NAME() : PARENT_TYPE() {} \
+ \
+ explicit NAME(CPURegister other) : PARENT_TYPE(other) { \
+ VIXL_ASSERT(IsValid()); \
+ } \
+ \
+ VIXL_CONSTEXPR static unsigned GetMaxCode() { \
+ return kNumberOf##REGISTER_TYPE##s - 1; \
+ }
+
+// Any W or X register, including the zero register and the stack pointer.
+class Register : public CPURegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister)
+
+ Register(int code, int size_in_bits)
+ : CPURegister(code, size_in_bits, kRegister) {
+ VIXL_ASSERT(IsValidRegister());
+ }
+
+ bool IsValid() const { return IsValidRegister(); }
+};
+
+// Any FP or NEON V register, including vector (V.<T>) and scalar forms
+// (B, H, S, D, Q).
+class VRegister : public CPURegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister)
+
+ // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0).
+ explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1)
+ : CPURegister(code,
+ EncodeSizeInBits(size_in_bits),
+ kVRegisterBank,
+ EncodeLaneSizeInBits(size_in_bits, lanes)) {
+ VIXL_ASSERT(IsValidVRegister());
+ }
+
+ VRegister(int code, VectorFormat format)
+ : CPURegister(code,
+ EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)),
+ kVRegisterBank,
+ EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+ kNoQualifiers) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ VRegister V8B() const;
+ VRegister V16B() const;
+ VRegister V2H() const;
+ VRegister V4H() const;
+ VRegister V8H() const;
+ VRegister V2S() const;
+ VRegister V4S() const;
+ VRegister V1D() const;
+ VRegister V2D() const;
+ VRegister S4B() const;
+
+ bool IsValid() const { return IsValidVRegister(); }
+
+ protected:
+ static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) {
+ VIXL_ASSERT(lanes >= 1);
+ VIXL_ASSERT((size_in_bits % lanes) == 0);
+ return EncodeSizeInBits(size_in_bits / lanes);
+ }
+};
+
+// Any SVE Z register, with or without a lane size specifier.
+class ZRegister : public CPURegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister)
+
+ explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize)
+ : CPURegister(code,
+ kEncodedUnknownSize,
+ kVRegisterBank,
+ EncodeSizeInBits(lane_size_in_bits)) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ ZRegister(int code, VectorFormat format)
+ : CPURegister(code,
+ kEncodedUnknownSize,
+ kVRegisterBank,
+ EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+ kNoQualifiers) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ // Return a Z register with a known lane size (like "z0.B").
+ ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); }
+ ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); }
+ ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); }
+ ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); }
+ ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); }
+
+ template <typename T>
+ ZRegister WithLaneSize(T format) const {
+ return ZRegister(GetCode(), format);
+ }
+
+ ZRegister WithSameLaneSizeAs(const CPURegister& other) const {
+ VIXL_ASSERT(other.HasLaneSize());
+ return this->WithLaneSize(other.GetLaneSizeInBits());
+ }
+
+ bool IsValid() const { return IsValidZRegister(); }
+};
+
+// Any SVE P register, with or without a qualifier or lane size specifier.
+class PRegister : public CPURegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister)
+
+ explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ bool IsValid() const {
+ return IsValidPRegister() && !HasLaneSize() && IsUnqualified();
+ }
+
+ // Return a P register with a known lane size (like "p0.B").
+ PRegisterWithLaneSize VnB() const;
+ PRegisterWithLaneSize VnH() const;
+ PRegisterWithLaneSize VnS() const;
+ PRegisterWithLaneSize VnD() const;
+
+ template <typename T>
+ PRegisterWithLaneSize WithLaneSize(T format) const;
+
+ PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const;
+
+ // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or
+ // "/m" (merging) suffix. These methods are VIXL's equivalents.
+ PRegisterZ Zeroing() const;
+ PRegisterM Merging() const;
+
+ protected:
+ // Unchecked constructors, for use by derived classes.
+ PRegister(int code, EncodedSize encoded_lane_size)
+ : CPURegister(code,
+ kEncodedUnknownSize,
+ kPRegisterBank,
+ encoded_lane_size,
+ kNoQualifiers) {}
+
+ PRegister(int code, Qualifiers qualifiers)
+ : CPURegister(code,
+ kEncodedUnknownSize,
+ kPRegisterBank,
+ kEncodedUnknownSize,
+ qualifiers) {}
+};
+
+// Any SVE P register with a known lane size (like "p0.B").
+class PRegisterWithLaneSize : public PRegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister)
+
+ PRegisterWithLaneSize(int code, int lane_size_in_bits)
+ : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ PRegisterWithLaneSize(int code, VectorFormat format)
+ : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ bool IsValid() const {
+ return IsValidPRegister() && HasLaneSize() && IsUnqualified();
+ }
+
+ // Overload lane size accessors so we can assert `HasLaneSize()`. This allows
+ // tools such as clang-tidy to prove that the result of GetLaneSize* is
+ // non-zero.
+
+ // TODO: Make these return 'int'.
+ unsigned GetLaneSizeInBits() const {
+ VIXL_ASSERT(HasLaneSize());
+ return PRegister::GetLaneSizeInBits();
+ }
+
+ unsigned GetLaneSizeInBytes() const {
+ VIXL_ASSERT(HasLaneSize());
+ return PRegister::GetLaneSizeInBytes();
+ }
+};
+
+// Any SVE P register with the zeroing qualifier (like "p0/z").
+class PRegisterZ : public PRegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister)
+
+ explicit PRegisterZ(int code) : PRegister(code, kZeroing) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ bool IsValid() const {
+ return IsValidPRegister() && !HasLaneSize() && IsZeroing();
+ }
+};
+
+// Any SVE P register with the merging qualifier (like "p0/m").
+class PRegisterM : public PRegister {
+ public:
+ VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister)
+
+ explicit PRegisterM(int code) : PRegister(code, kMerging) {
+ VIXL_ASSERT(IsValid());
+ }
+
+ bool IsValid() const {
+ return IsValidPRegister() && !HasLaneSize() && IsMerging();
+ }
+};
+
+inline PRegisterWithLaneSize PRegister::VnB() const {
+ return PRegisterWithLaneSize(GetCode(), kBRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnH() const {
+ return PRegisterWithLaneSize(GetCode(), kHRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnS() const {
+ return PRegisterWithLaneSize(GetCode(), kSRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnD() const {
+ return PRegisterWithLaneSize(GetCode(), kDRegSize);
+}
+
+template <typename T>
+inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const {
+ return PRegisterWithLaneSize(GetCode(), format);
+}
+
+inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs(
+ const CPURegister& other) const {
+ VIXL_ASSERT(other.HasLaneSize());
+ return this->WithLaneSize(other.GetLaneSizeInBits());
+}
+
+inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); }
+inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); }
+
+#define VIXL_REGISTER_WITH_SIZE_LIST(V) \
+ V(WRegister, kWRegSize, Register) \
+ V(XRegister, kXRegSize, Register) \
+ V(QRegister, kQRegSize, VRegister) \
+ V(DRegister, kDRegSize, VRegister) \
+ V(SRegister, kSRegSize, VRegister) \
+ V(HRegister, kHRegSize, VRegister) \
+ V(BRegister, kBRegSize, VRegister)
+
+#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT) \
+ class NAME : public PARENT { \
+ public: \
+ VIXL_CONSTEXPR NAME() : PARENT() {} \
+ explicit NAME(int code) : PARENT(code, SIZE) {} \
+ \
+ explicit NAME(PARENT other) : PARENT(other) { \
+ VIXL_ASSERT(GetSizeInBits() == SIZE); \
+ } \
+ \
+ PARENT As##PARENT() const { return *this; } \
+ \
+ VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; } \
+ \
+ bool IsValid() const { \
+ return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \
+ } \
+ };
+
+VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE)
+
+// No*Reg is used to provide default values for unused arguments, error cases
+// and so on. Note that these (and the default constructors) all compare equal
+// (using the Is() method).
+const Register NoReg;
+const VRegister NoVReg;
+const CPURegister NoCPUReg;
+const ZRegister NoZReg;
+
+// TODO: Ideally, these would use specialised register types (like XRegister and
+// so on). However, doing so throws up template overloading problems elsewhere.
+#define VIXL_DEFINE_REGISTERS(N) \
+ const Register w##N = WRegister(N); \
+ const Register x##N = XRegister(N); \
+ const VRegister b##N = BRegister(N); \
+ const VRegister h##N = HRegister(N); \
+ const VRegister s##N = SRegister(N); \
+ const VRegister d##N = DRegister(N); \
+ const VRegister q##N = QRegister(N); \
+ const VRegister v##N(N); \
+ const ZRegister z##N(N);
+AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS)
+#undef VIXL_DEFINE_REGISTERS
+
+#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N);
+AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS)
+#undef VIXL_DEFINE_P_REGISTERS
+
+// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'.
+const Register wsp = WRegister(kSPRegInternalCode);
+const Register sp = XRegister(kSPRegInternalCode);
+
+// Standard aliases.
+const Register ip0 = x16;
+const Register ip1 = x17;
+const Register lr = x30;
+const Register xzr = x31;
+const Register wzr = w31;
+
+// AreAliased returns true if any of the named registers overlap. Arguments
+// set to NoReg are ignored. The system stack pointer may be specified.
+bool AreAliased(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoReg,
+ const CPURegister& reg4 = NoReg,
+ const CPURegister& reg5 = NoReg,
+ const CPURegister& reg6 = NoReg,
+ const CPURegister& reg7 = NoReg,
+ const CPURegister& reg8 = NoReg);
+
+// AreSameSizeAndType returns true if all of the specified registers have the
+// same size, and are of the same type. The system stack pointer may be
+// specified. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreSameSizeAndType(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg,
+ const CPURegister& reg5 = NoCPUReg,
+ const CPURegister& reg6 = NoCPUReg,
+ const CPURegister& reg7 = NoCPUReg,
+ const CPURegister& reg8 = NoCPUReg);
+
+// AreEven returns true if all of the specified registers have even register
+// indices. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreEven(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoReg,
+ const CPURegister& reg4 = NoReg,
+ const CPURegister& reg5 = NoReg,
+ const CPURegister& reg6 = NoReg,
+ const CPURegister& reg7 = NoReg,
+ const CPURegister& reg8 = NoReg);
+
+// AreConsecutive returns true if all of the specified registers are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoCPUReg).
+bool AreConsecutive(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg);
+
+// AreSameFormat returns true if all of the specified registers have the same
+// vector format. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoVReg).
+bool AreSameFormat(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg);
+
+// AreSameLaneSize returns true if all of the specified registers have the same
+// element lane size, B, H, S or D. It doesn't compare the type of registers.
+// Arguments set to NoReg are ignored, as are any subsequent arguments.
+// At least one argument (reg1) must be valid (not NoVReg).
+// TODO: Remove this, and replace its uses with AreSameFormat.
+bool AreSameLaneSize(const CPURegister& reg1,
+ const CPURegister& reg2,
+ const CPURegister& reg3 = NoCPUReg,
+ const CPURegister& reg4 = NoCPUReg);
+}
+} // namespace vixl::aarch64
+
+#endif // VIXL_AARCH64_REGISTERS_AARCH64_H_
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 855a2971..6d6d1677 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -26,6 +26,9 @@
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+#include <errno.h>
+#include <unistd.h>
+
#include <cmath>
#include <cstring>
#include <limits>
@@ -65,12 +68,13 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
Simulator::Simulator(Decoder* decoder, FILE* stream)
- : cpu_features_auditor_(decoder, CPUFeatures::All()) {
+ : movprfx_(NULL), cpu_features_auditor_(decoder, CPUFeatures::All()) {
// Ensure that shift operations act as the simulator expects.
VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
- instruction_stats_ = false;
+ // Set up a dummy pipe for CanReadMemory.
+ VIXL_CHECK(pipe(dummy_pipe_fd_) == 0);
// Set up the decoder.
decoder_ = decoder;
@@ -91,6 +95,10 @@ Simulator::Simulator(Decoder* decoder, FILE* stream)
SetColouredTrace(false);
trace_parameters_ = LOG_NONE;
+ // We have to configure the SVE vector register length before calling
+ // ResetState().
+ SetVectorLengthInBits(kZRegMinSize);
+
ResetState();
// Allocate and set up the simulator stack.
@@ -105,8 +113,6 @@ Simulator::Simulator(Decoder* decoder, FILE* stream)
tos = AlignDown(tos, 16);
WriteSp(tos);
- instrumentation_ = NULL;
-
// Print a warning about exclusive-access instructions, but only the first
// time they are encountered. This warning can be silenced using
// SilenceExclusiveAccessWarning().
@@ -116,52 +122,111 @@ Simulator::Simulator(Decoder* decoder, FILE* stream)
// Initialize the common state of RNDR and RNDRRS.
uint16_t seed[3] = {11, 22, 33};
- VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rndr_state_));
- memcpy(rndr_state_, seed, sizeof(rndr_state_));
-}
+ VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
+ memcpy(rand_state_, seed, sizeof(rand_state_));
+ // Initialize all bits of pseudo predicate register to true.
+ LogicPRegister ones(pregister_all_true_);
+ ones.SetAllBits();
+}
-void Simulator::ResetState() {
+void Simulator::ResetSystemRegisters() {
// Reset the system registers.
nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
+ ResetFFR();
+}
- // Reset registers to 0.
- pc_ = NULL;
- pc_modified_ = false;
+void Simulator::ResetRegisters() {
for (unsigned i = 0; i < kNumberOfRegisters; i++) {
WriteXRegister(i, 0xbadbeef);
}
- // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
- uint64_t nan_bits[] = {
- UINT64_C(0x7ff00cab7f8ba9e1), UINT64_C(0x7ff0dead7f8beef1),
- };
- VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits[0] & kDRegMask)));
- VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits[0] & kSRegMask)));
+ // Returning to address 0 exits the Simulator.
+ WriteLr(kEndOfSimAddress);
+}
- qreg_t q_bits;
- VIXL_ASSERT(sizeof(q_bits) == sizeof(nan_bits));
- memcpy(&q_bits, nan_bits, sizeof(nan_bits));
+void Simulator::ResetVRegisters() {
+ // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
+ VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
+ int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
+ for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+ VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
+ vregisters_[i].NotifyAccessAsZ();
+ for (int lane = 0; lane < lane_count; lane++) {
+ // Encode the register number and (D-sized) lane into each NaN, to
+ // make them easier to trace.
+ uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
+ (0x0000000000000001 * lane);
+ VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
+ VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
+ vregisters_[i].Insert(lane, nan_bits);
+ }
+ }
+}
- for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
- WriteQRegister(i, q_bits);
+void Simulator::ResetPRegisters() {
+ VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
+ int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
+ // Ensure the register configuration fits in this bit encoding.
+ VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
+ VIXL_ASSERT(lane_count <= UINT8_MAX);
+ for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+ VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
+ for (int lane = 0; lane < lane_count; lane++) {
+ // Encode the register number and (H-sized) lane into each lane slot.
+ uint16_t bits = (0x0100 * lane) | i;
+ pregisters_[i].Insert(lane, bits);
+ }
}
- // Returning to address 0 exits the Simulator.
- WriteLr(kEndOfSimAddress);
+}
+
+void Simulator::ResetFFR() {
+ VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
+ int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
+ ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
+}
+
+void Simulator::ResetState() {
+ ResetSystemRegisters();
+ ResetRegisters();
+ ResetVRegisters();
+ ResetPRegisters();
+ pc_ = NULL;
+ pc_modified_ = false;
+
+ // BTI state.
btype_ = DefaultBType;
next_btype_ = DefaultBType;
}
+void Simulator::SetVectorLengthInBits(unsigned vector_length) {
+ VIXL_ASSERT((vector_length >= kZRegMinSize) &&
+ (vector_length <= kZRegMaxSize));
+ VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
+ vector_length_ = vector_length;
+
+ for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+ vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
+ }
+ for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+ pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
+ }
+
+ ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
+
+ ResetVRegisters();
+ ResetPRegisters();
+ ResetFFR();
+}
Simulator::~Simulator() {
delete[] stack_;
// The decoder may outlive the simulator.
decoder_->RemoveVisitor(print_disasm_);
delete print_disasm_;
-
- decoder_->RemoveVisitor(instrumentation_);
- delete instrumentation_;
+ close(dummy_pipe_fd_[0]);
+ close(dummy_pipe_fd_[1]);
}
@@ -182,6 +247,7 @@ void Simulator::RunFrom(const Instruction* first) {
}
+// clang-format off
const char* Simulator::xreg_names[] = {"x0", "x1", "x2", "x3", "x4", "x5",
"x6", "x7", "x8", "x9", "x10", "x11",
"x12", "x13", "x14", "x15", "x16", "x17",
@@ -196,6 +262,13 @@ const char* Simulator::wreg_names[] = {"w0", "w1", "w2", "w3", "w4", "w5",
"w24", "w25", "w26", "w27", "w28", "w29",
"w30", "wzr", "wsp"};
+const char* Simulator::breg_names[] = {"b0", "b1", "b2", "b3", "b4", "b5",
+ "b6", "b7", "b8", "b9", "b10", "b11",
+ "b12", "b13", "b14", "b15", "b16", "b17",
+ "b18", "b19", "b20", "b21", "b22", "b23",
+ "b24", "b25", "b26", "b27", "b28", "b29",
+ "b30", "b31"};
+
const char* Simulator::hreg_names[] = {"h0", "h1", "h2", "h3", "h4", "h5",
"h6", "h7", "h8", "h9", "h10", "h11",
"h12", "h13", "h14", "h15", "h16", "h17",
@@ -224,27 +297,47 @@ const char* Simulator::vreg_names[] = {"v0", "v1", "v2", "v3", "v4", "v5",
"v24", "v25", "v26", "v27", "v28", "v29",
"v30", "v31"};
+const char* Simulator::zreg_names[] = {"z0", "z1", "z2", "z3", "z4", "z5",
+ "z6", "z7", "z8", "z9", "z10", "z11",
+ "z12", "z13", "z14", "z15", "z16", "z17",
+ "z18", "z19", "z20", "z21", "z22", "z23",
+ "z24", "z25", "z26", "z27", "z28", "z29",
+ "z30", "z31"};
+
+const char* Simulator::preg_names[] = {"p0", "p1", "p2", "p3", "p4", "p5",
+ "p6", "p7", "p8", "p9", "p10", "p11",
+ "p12", "p13", "p14", "p15"};
+// clang-format on
+
const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
- VIXL_ASSERT(code < kNumberOfRegisters);
// If the code represents the stack pointer, index the name after zr.
- if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+ if ((code == kSPRegInternalCode) ||
+ ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
code = kZeroRegCode + 1;
}
+ VIXL_ASSERT(code < ArrayLength(wreg_names));
return wreg_names[code];
}
const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
- VIXL_ASSERT(code < kNumberOfRegisters);
// If the code represents the stack pointer, index the name after zr.
- if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+ if ((code == kSPRegInternalCode) ||
+ ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
code = kZeroRegCode + 1;
}
+ VIXL_ASSERT(code < ArrayLength(xreg_names));
return xreg_names[code];
}
+const char* Simulator::BRegNameForCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfVRegisters);
+ return breg_names[code];
+}
+
+
const char* Simulator::HRegNameForCode(unsigned code) {
VIXL_ASSERT(code < kNumberOfVRegisters);
return hreg_names[code];
@@ -269,6 +362,39 @@ const char* Simulator::VRegNameForCode(unsigned code) {
}
+const char* Simulator::ZRegNameForCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfZRegisters);
+ return zreg_names[code];
+}
+
+
+const char* Simulator::PRegNameForCode(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfPRegisters);
+ return preg_names[code];
+}
+
+SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
+ SimVRegister ones, result;
+ dup_immediate(kFormatVnB, ones, 0xff);
+ mov_zeroing(kFormatVnB, result, pg, ones);
+ return result;
+}
+
+void Simulator::ExtractFromSimVRegister(VectorFormat vform,
+ SimPRegister& pd,
+ SimVRegister vreg) {
+ SimVRegister zero;
+ dup_immediate(kFormatVnB, zero, 0);
+ SVEIntCompareVectorsHelper(ne,
+ vform,
+ pd,
+ GetPTrue(),
+ vreg,
+ zero,
+ false,
+ LeaveFlags);
+}
+
#define COLOUR(colour_code) "\033[0;" colour_code "m"
#define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
#define COLOUR_HIGHLIGHT "\033[43m"
@@ -291,6 +417,8 @@ void Simulator::SetColouredTrace(bool value) {
clr_reg_value = value ? COLOUR(CYAN) : "";
clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
clr_vreg_value = value ? COLOUR(MAGENTA) : "";
+ clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
+ clr_preg_value = value ? COLOUR(GREEN) : "";
clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
clr_warning_message = value ? COLOUR(YELLOW) : "";
@@ -322,22 +450,6 @@ void Simulator::SetTraceParameters(int parameters) {
}
-void Simulator::SetInstructionStats(bool value) {
- if (value != instruction_stats_) {
- if (value) {
- if (instrumentation_ == NULL) {
- // Set the sample period to 10, as the VIXL examples and tests are
- // short.
- instrumentation_ = new Instrument("vixl_stats.csv", 10);
- }
- decoder_->AppendVisitor(instrumentation_);
- } else if (instrumentation_ != NULL) {
- decoder_->RemoveVisitor(instrumentation_);
- }
- instruction_stats_ = value;
- }
-}
-
// Helpers ---------------------------------------------------------------------
uint64_t Simulator::AddWithCarry(unsigned reg_size,
bool set_flags,
@@ -379,44 +491,50 @@ uint64_t Simulator::AddWithCarry(unsigned reg_size,
int64_t Simulator::ShiftOperand(unsigned reg_size,
- int64_t value,
+ uint64_t uvalue,
Shift shift_type,
unsigned amount) const {
- VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
- if (amount == 0) {
- return value;
- }
- uint64_t uvalue = static_cast<uint64_t>(value);
- uint64_t mask = kWRegMask;
- bool is_negative = (uvalue & kWSignMask) != 0;
- if (reg_size == kXRegSize) {
- mask = kXRegMask;
- is_negative = (uvalue & kXSignMask) != 0;
- }
-
- switch (shift_type) {
- case LSL:
- uvalue <<= amount;
- break;
- case LSR:
- uvalue >>= amount;
- break;
- case ASR:
- uvalue >>= amount;
- if (is_negative) {
- // Simulate sign-extension to 64 bits.
- uvalue |= ~UINT64_C(0) << (reg_size - amount);
+ VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
+ (reg_size == kSRegSize) || (reg_size == kDRegSize));
+ if (amount > 0) {
+ uint64_t mask = GetUintMask(reg_size);
+ bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
+ // The behavior is undefined in c++ if the shift amount greater than or
+ // equal to the register lane size. Work out the shifted result based on
+ // architectural behavior before performing the c++ type shfit operations.
+ switch (shift_type) {
+ case LSL:
+ if (amount >= reg_size) {
+ return UINT64_C(0);
+ }
+ uvalue <<= amount;
+ break;
+ case LSR:
+ if (amount >= reg_size) {
+ return UINT64_C(0);
+ }
+ uvalue >>= amount;
+ break;
+ case ASR:
+ if (amount >= reg_size) {
+ return is_negative ? ~UINT64_C(0) : UINT64_C(0);
+ }
+ uvalue >>= amount;
+ if (is_negative) {
+ // Simulate sign-extension to 64 bits.
+ uvalue |= ~UINT64_C(0) << (reg_size - amount);
+ }
+ break;
+ case ROR: {
+ uvalue = RotateRight(uvalue, amount, reg_size);
+ break;
}
- break;
- case ROR: {
- uvalue = RotateRight(uvalue, amount, reg_size);
- break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ return 0;
}
- default:
- VIXL_UNIMPLEMENTED();
- return 0;
+ uvalue &= mask;
}
- uvalue &= mask;
int64_t result;
memcpy(&result, &uvalue, sizeof(result));
@@ -592,6 +710,15 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
return kPrintReg1S;
case kFormatD:
return kPrintReg1D;
+
+ case kFormatVnB:
+ return kPrintRegVnB;
+ case kFormatVnH:
+ return kPrintRegVnH;
+ case kFormatVnS:
+ return kPrintRegVnS;
+ case kFormatVnD:
+ return kPrintRegVnD;
}
}
@@ -623,301 +750,445 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
}
}
-
-void Simulator::PrintWrittenRegisters() {
+void Simulator::PrintRegisters() {
for (unsigned i = 0; i < kNumberOfRegisters; i++) {
- if (registers_[i].WrittenSinceLastLog()) PrintRegister(i);
+ if (i == kSpRegCode) i = kSPRegInternalCode;
+ PrintRegister(i);
}
}
-
-void Simulator::PrintWrittenVRegisters() {
+void Simulator::PrintVRegisters() {
for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
- // At this point there is no type information, so print as a raw 1Q.
- if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q);
+ PrintVRegister(i);
}
}
-
-void Simulator::PrintSystemRegisters() {
- PrintSystemRegister(NZCV);
- PrintSystemRegister(FPCR);
+void Simulator::PrintZRegisters() {
+ for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+ PrintZRegister(i);
+ }
}
-
-void Simulator::PrintRegisters() {
+void Simulator::PrintWrittenRegisters() {
for (unsigned i = 0; i < kNumberOfRegisters; i++) {
- PrintRegister(i);
+ if (registers_[i].WrittenSinceLastLog()) {
+ if (i == kSpRegCode) i = kSPRegInternalCode;
+ PrintRegister(i);
+ }
}
}
-
-void Simulator::PrintVRegisters() {
+void Simulator::PrintWrittenVRegisters() {
+ bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
- // At this point there is no type information, so print as a raw 1Q.
- PrintVRegister(i, kPrintReg1Q);
+ if (vregisters_[i].WrittenSinceLastLog()) {
+ // Z registers are initialised in the constructor before the user can
+ // configure the CPU features, so we must also check for SVE here.
+ if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
+ PrintZRegister(i);
+ } else {
+ PrintVRegister(i);
+ }
+ }
}
}
-
-// Print a register's name and raw value.
-//
-// Only the least-significant `size_in_bytes` bytes of the register are printed,
-// but the value is aligned as if the whole register had been printed.
-//
-// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes
-// -- the default -- so that the whole register is printed. Other values of
-// size_in_bytes are intended for use when the register hasn't actually been
-// updated (such as in PrintWrite).
-//
-// No newline is printed. This allows the caller to print more details (such as
-// a memory access annotation).
-void Simulator::PrintRegisterRawHelper(unsigned code,
- Reg31Mode r31mode,
- int size_in_bytes) {
- // The template for all supported sizes.
- // "# x{code}: 0xffeeddccbbaa9988"
- // "# w{code}: 0xbbaa9988"
- // "# w{code}<15:0>: 0x9988"
- // "# w{code}<7:0>: 0x88"
- unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2;
-
- const char* name = "";
- const char* suffix = "";
- switch (size_in_bytes) {
- case kXRegSizeInBytes:
- name = XRegNameForCode(code, r31mode);
- break;
- case kWRegSizeInBytes:
- name = WRegNameForCode(code, r31mode);
- break;
- case 2:
- name = WRegNameForCode(code, r31mode);
- suffix = "<15:0>";
- padding_chars -= strlen(suffix);
- break;
- case 1:
- name = WRegNameForCode(code, r31mode);
- suffix = "<7:0>";
- padding_chars -= strlen(suffix);
- break;
- default:
- VIXL_UNREACHABLE();
- }
- fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix);
-
- // Print leading padding spaces.
- VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2));
- for (unsigned i = 0; i < padding_chars; i++) {
- putc(' ', stream_);
+void Simulator::PrintWrittenPRegisters() {
+ // P registers are initialised in the constructor before the user can
+ // configure the CPU features, so we must check for SVE here.
+ if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
+ for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+ if (pregisters_[i].WrittenSinceLastLog()) {
+ PrintPRegister(i);
+ }
}
-
- // Print the specified bits in hexadecimal format.
- uint64_t bits = ReadRegister<uint64_t>(code, r31mode);
- bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8);
- VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes);
-
- int chars = size_in_bytes * 2;
- fprintf(stream_,
- "%s0x%0*" PRIx64 "%s",
- clr_reg_value,
- chars,
- bits,
- clr_normal);
+ if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
}
-
-void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
- registers_[code].NotifyRegisterLogged();
-
- // Don't print writes into xzr.
- if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
- return;
- }
-
- // The template for all x and w registers:
- // "# x{code}: 0x{value}"
- // "# w{code}: 0x{value}"
-
- PrintRegisterRawHelper(code, r31mode);
- fprintf(stream_, "\n");
+void Simulator::PrintSystemRegisters() {
+ PrintSystemRegister(NZCV);
+ PrintSystemRegister(FPCR);
}
-
-// Print a register's name and raw value.
-//
-// The `bytes` and `lsb` arguments can be used to limit the bytes that are
-// printed. These arguments are intended for use in cases where register hasn't
-// actually been updated (such as in PrintVWrite).
-//
-// No newline is printed. This allows the caller to print more details (such as
-// a floating-point interpretation or a memory access annotation).
-void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) {
- // The template for vector types:
- // "# v{code}: 0xffeeddccbbaa99887766554433221100".
- // An example with bytes=4 and lsb=8:
- // "# v{code}: 0xbbaa9988 ".
- fprintf(stream_,
- "# %s%5s: %s",
- clr_vreg_name,
- VRegNameForCode(code),
- clr_vreg_value);
-
- int msb = lsb + bytes - 1;
- int byte = kQRegSizeInBytes - 1;
-
- // Print leading padding spaces. (Two spaces per byte.)
- while (byte > msb) {
+void Simulator::PrintRegisterValue(const uint8_t* value,
+ int value_size,
+ PrintRegisterFormat format) {
+ int print_width = GetPrintRegSizeInBytes(format);
+ VIXL_ASSERT(print_width <= value_size);
+ for (int i = value_size - 1; i >= print_width; i--) {
+ // Pad with spaces so that values align vertically.
fprintf(stream_, " ");
- byte--;
+ // If we aren't explicitly printing a partial value, ensure that the
+ // unprinted bits are zero.
+ VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
}
-
- // Print the specified part of the value, byte by byte.
- qreg_t rawbits = ReadQRegister(code);
fprintf(stream_, "0x");
- while (byte >= lsb) {
- fprintf(stream_, "%02x", rawbits.val[byte]);
- byte--;
+ for (int i = print_width - 1; i >= 0; i--) {
+ fprintf(stream_, "%02x", value[i]);
}
+}
- // Print trailing padding spaces.
- while (byte >= 0) {
- fprintf(stream_, " ");
- byte--;
+void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
+ uint16_t lane_mask,
+ PrintRegisterFormat format) {
+ VIXL_ASSERT((format & kPrintRegAsFP) != 0);
+ int lane_size = GetPrintRegLaneSizeInBytes(format);
+ fprintf(stream_, " (");
+ bool last_inactive = false;
+ const char* sep = "";
+ for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
+ bool access = (lane_mask & (1 << (i * lane_size))) != 0;
+ if (access) {
+ // Read the lane as a double, so we can format all FP types in the same
+ // way. We squash NaNs, and a double can exactly represent any other value
+ // that the smaller types can represent, so this is lossless.
+ double element;
+ switch (lane_size) {
+ case kHRegSizeInBytes: {
+ Float16 element_fp16;
+ VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
+ memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
+ element = FPToDouble(element_fp16, kUseDefaultNaN);
+ break;
+ }
+ case kSRegSizeInBytes: {
+ float element_fp32;
+ memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
+ element = static_cast<double>(element_fp32);
+ break;
+ }
+ case kDRegSizeInBytes: {
+ memcpy(&element, &value[i * lane_size], sizeof(element));
+ break;
+ }
+ default:
+ VIXL_UNREACHABLE();
+ fprintf(stream_, "{UnknownFPValue}");
+ continue;
+ }
+ if (IsNaN(element)) {
+ // The fprintf behaviour for NaNs is implementation-defined. Always
+ // print "nan", so that traces are consistent.
+ fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
+ } else {
+ fprintf(stream_,
+ "%s%s%#.4g%s",
+ sep,
+ clr_vreg_value,
+ element,
+ clr_normal);
+ }
+ last_inactive = false;
+ } else if (!last_inactive) {
+ // Replace each contiguous sequence of inactive lanes with "...".
+ fprintf(stream_, "%s...", sep);
+ last_inactive = true;
+ }
}
- fprintf(stream_, "%s", clr_normal);
+ fprintf(stream_, ")");
}
+void Simulator::PrintRegister(int code,
+ PrintRegisterFormat format,
+ const char* suffix) {
+ VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
+ (static_cast<unsigned>(code) == kSPRegInternalCode));
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
+ VIXL_ASSERT((format & kPrintRegAsFP) == 0);
-// Print each of the specified lanes of a register as a float or double value.
-//
-// The `lane_count` and `lslane` arguments can be used to limit the lanes that
-// are printed. These arguments are intended for use in cases where register
-// hasn't actually been updated (such as in PrintVWrite).
-//
-// No newline is printed. This allows the caller to print more details (such as
-// a memory access annotation).
-void Simulator::PrintVRegisterFPHelper(unsigned code,
- unsigned lane_size_in_bytes,
- int lane_count,
- int rightmost_lane) {
- VIXL_ASSERT((lane_size_in_bytes == kHRegSizeInBytes) ||
- (lane_size_in_bytes == kSRegSizeInBytes) ||
- (lane_size_in_bytes == kDRegSizeInBytes));
-
- unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes);
- VIXL_ASSERT(msb <= kQRegSizeInBytes);
-
- // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register
- // name is used:
- // " (h{code}: {value})"
- // " (s{code}: {value})"
- // " (d{code}: {value})"
- // For vector types, "..." is used to represent one or more omitted lanes.
- // " (..., {value}, {value}, ...)"
- if (lane_size_in_bytes == kHRegSizeInBytes) {
- // TODO: Trace tests will fail until we regenerate them.
- return;
- }
- if ((lane_count == 1) && (rightmost_lane == 0)) {
- const char* name;
- switch (lane_size_in_bytes) {
- case kHRegSizeInBytes:
- name = HRegNameForCode(code);
+ SimRegister* reg;
+ SimRegister zero;
+ if (code == kZeroRegCode) {
+ reg = &zero;
+ } else {
+ // registers_[31] holds the SP.
+ VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
+ reg = &registers_[code % kNumberOfRegisters];
+ }
+
+ // We trace register writes as whole register values, implying that any
+ // unprinted bits are all zero:
+ // "# x{code}: 0x{-----value----}"
+ // "# w{code}: 0x{-value}"
+ // Stores trace partial register values, implying nothing about the unprinted
+ // bits:
+ // "# x{code}<63:0>: 0x{-----value----}"
+ // "# x{code}<31:0>: 0x{-value}"
+ // "# x{code}<15:0>: 0x{--}"
+ // "# x{code}<7:0>: 0x{}"
+
+ bool is_partial = (format & kPrintRegPartial) != 0;
+ unsigned print_reg_size = GetPrintRegSizeInBits(format);
+ std::stringstream name;
+ if (is_partial) {
+ name << XRegNameForCode(code) << GetPartialRegSuffix(format);
+ } else {
+ // Notify the register that it has been logged, but only if we're printing
+ // all of it.
+ reg->NotifyRegisterLogged();
+ switch (print_reg_size) {
+ case kWRegSize:
+ name << WRegNameForCode(code);
break;
- case kSRegSizeInBytes:
- name = SRegNameForCode(code);
- break;
- case kDRegSizeInBytes:
- name = DRegNameForCode(code);
+ case kXRegSize:
+ name << XRegNameForCode(code);
break;
default:
- name = NULL;
VIXL_UNREACHABLE();
- }
- fprintf(stream_, " (%s%s: ", clr_vreg_name, name);
- } else {
- if (msb < (kQRegSizeInBytes - 1)) {
- fprintf(stream_, " (..., ");
- } else {
- fprintf(stream_, " (");
+ return;
}
}
- // Print the list of values.
- const char* separator = "";
- int leftmost_lane = rightmost_lane + lane_count - 1;
- for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) {
- double value;
- switch (lane_size_in_bytes) {
- case kHRegSizeInBytes:
- value = ReadVRegister(code).GetLane<uint16_t>(lane);
+ fprintf(stream_,
+ "# %s%*s: %s",
+ clr_reg_name,
+ kPrintRegisterNameFieldWidth,
+ name.str().c_str(),
+ clr_reg_value);
+ PrintRegisterValue(*reg, format);
+ fprintf(stream_, "%s%s", clr_normal, suffix);
+}
+
+void Simulator::PrintVRegister(int code,
+ PrintRegisterFormat format,
+ const char* suffix) {
+ VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
+ VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
+ ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
+ ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
+
+ // We trace register writes as whole register values, implying that any
+ // unprinted bits are all zero:
+ // "# v{code}: 0x{-------------value------------}"
+ // "# d{code}: 0x{-----value----}"
+ // "# s{code}: 0x{-value}"
+ // "# h{code}: 0x{--}"
+ // "# b{code}: 0x{}"
+ // Stores trace partial register values, implying nothing about the unprinted
+ // bits:
+ // "# v{code}<127:0>: 0x{-------------value------------}"
+ // "# v{code}<63:0>: 0x{-----value----}"
+ // "# v{code}<31:0>: 0x{-value}"
+ // "# v{code}<15:0>: 0x{--}"
+ // "# v{code}<7:0>: 0x{}"
+
+ bool is_partial = ((format & kPrintRegPartial) != 0);
+ std::stringstream name;
+ unsigned print_reg_size = GetPrintRegSizeInBits(format);
+ if (is_partial) {
+ name << VRegNameForCode(code) << GetPartialRegSuffix(format);
+ } else {
+ // Notify the register that it has been logged, but only if we're printing
+ // all of it.
+ vregisters_[code].NotifyRegisterLogged();
+ switch (print_reg_size) {
+ case kBRegSize:
+ name << BRegNameForCode(code);
break;
- case kSRegSizeInBytes:
- value = ReadVRegister(code).GetLane<float>(lane);
+ case kHRegSize:
+ name << HRegNameForCode(code);
break;
- case kDRegSizeInBytes:
- value = ReadVRegister(code).GetLane<double>(lane);
+ case kSRegSize:
+ name << SRegNameForCode(code);
+ break;
+ case kDRegSize:
+ name << DRegNameForCode(code);
+ break;
+ case kQRegSize:
+ name << VRegNameForCode(code);
break;
default:
- value = 0.0;
VIXL_UNREACHABLE();
+ return;
}
- if (IsNaN(value)) {
- // The output for NaNs is implementation defined. Always print `nan`, so
- // that traces are coherent across different implementations.
- fprintf(stream_, "%s%snan%s", separator, clr_vreg_value, clr_normal);
- } else {
- fprintf(stream_,
- "%s%s%#g%s",
- separator,
- clr_vreg_value,
- value,
- clr_normal);
- }
- separator = ", ";
}
- if (rightmost_lane > 0) {
- fprintf(stream_, ", ...");
+ fprintf(stream_,
+ "# %s%*s: %s",
+ clr_vreg_name,
+ kPrintRegisterNameFieldWidth,
+ name.str().c_str(),
+ clr_vreg_value);
+ PrintRegisterValue(vregisters_[code], format);
+ fprintf(stream_, "%s", clr_normal);
+ if ((format & kPrintRegAsFP) != 0) {
+ PrintRegisterValueFPAnnotations(vregisters_[code], format);
+ }
+ fprintf(stream_, "%s", suffix);
+}
+
+void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
+ int reg_count,
+ uint16_t focus_mask,
+ PrintRegisterFormat format) {
+ bool print_fp = (format & kPrintRegAsFP) != 0;
+ // Suppress FP formatting, so we can specify the lanes we're interested in.
+ PrintRegisterFormat format_no_fp =
+ static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
+
+ for (int r = 0; r < reg_count; r++) {
+ int code = (rt_code + r) % kNumberOfVRegisters;
+ PrintVRegister(code, format_no_fp, "");
+ if (print_fp) {
+ PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
+ }
+ fprintf(stream_, "\n");
}
- fprintf(stream_, ")");
}
+void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
+ int q_index,
+ int reg_count,
+ uint16_t focus_mask,
+ PrintRegisterFormat format) {
+ bool print_fp = (format & kPrintRegAsFP) != 0;
+ // Suppress FP formatting, so we can specify the lanes we're interested in.
+ PrintRegisterFormat format_no_fp =
+ static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
+
+ PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
+
+ const unsigned size = kQRegSizeInBytes;
+ unsigned byte_index = q_index * size;
+ const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
+ VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
+
+ for (int r = 0; r < reg_count; r++) {
+ int code = (rt_code + r) % kNumberOfZRegisters;
+ PrintPartialZRegister(code, q_index, format_no_fp, "");
+ if (print_fp) {
+ PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
+ }
+ fprintf(stream_, "\n");
+ }
+}
-void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) {
+void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
+ // We're going to print the register in parts, so force a partial format.
+ format = GetPrintRegPartial(format);
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+ int vl = GetVectorLengthInBits();
+ VIXL_ASSERT((vl % kQRegSize) == 0);
+ for (unsigned i = 0; i < (vl / kQRegSize); i++) {
+ PrintPartialZRegister(code, i, format);
+ }
vregisters_[code].NotifyRegisterLogged();
+}
- int lane_size_log2 = format & kPrintRegLaneSizeMask;
+void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
+ // We're going to print the register in parts, so force a partial format.
+ format = GetPrintRegPartial(format);
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+ int vl = GetVectorLengthInBits();
+ VIXL_ASSERT((vl % kQRegSize) == 0);
+ for (unsigned i = 0; i < (vl / kQRegSize); i++) {
+ PrintPartialPRegister(code, i, format);
+ }
+ pregisters_[code].NotifyRegisterLogged();
+}
- int reg_size_log2;
- if (format & kPrintRegAsQVector) {
- reg_size_log2 = kQRegSizeInBytesLog2;
- } else if (format & kPrintRegAsDVector) {
- reg_size_log2 = kDRegSizeInBytesLog2;
- } else {
- // Scalar types.
- reg_size_log2 = lane_size_log2;
+void Simulator::PrintFFR(PrintRegisterFormat format) {
+ // We're going to print the register in parts, so force a partial format.
+ format = GetPrintRegPartial(format);
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+ int vl = GetVectorLengthInBits();
+ VIXL_ASSERT((vl % kQRegSize) == 0);
+ SimPRegister& ffr = ReadFFR();
+ for (unsigned i = 0; i < (vl / kQRegSize); i++) {
+ PrintPartialPRegister("FFR", ffr, i, format);
}
+ ffr.NotifyRegisterLogged();
+}
+
+void Simulator::PrintPartialZRegister(int code,
+ int q_index,
+ PrintRegisterFormat format,
+ const char* suffix) {
+ VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+ VIXL_ASSERT((format & kPrintRegPartial) != 0);
+ VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
- int lane_count = 1 << (reg_size_log2 - lane_size_log2);
- int lane_size = 1 << lane_size_log2;
+ // We _only_ trace partial Z register values in Q-sized chunks, because
+ // they're often too large to reasonably fit on a single line. Each line
+ // implies nothing about the unprinted bits.
+ // "# z{code}<127:0>: 0x{-------------value------------}"
- // The template for vector types:
- // "# v{code}: 0x{rawbits} (..., {value}, ...)".
- // The template for scalar types:
- // "# v{code}: 0x{rawbits} ({reg}:{value})".
- // The values in parentheses after the bit representations are floating-point
- // interpretations. They are displayed only if the kPrintVRegAsFP bit is set.
+ format = GetPrintRegAsQChunkOfSVE(format);
- PrintVRegisterRawHelper(code);
- if (format & kPrintRegAsFP) {
- PrintVRegisterFPHelper(code, lane_size, lane_count);
+ const unsigned size = kQRegSizeInBytes;
+ unsigned byte_index = q_index * size;
+ const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
+ VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
+
+ int lsb = q_index * kQRegSize;
+ int msb = lsb + kQRegSize - 1;
+ std::stringstream name;
+ name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
+
+ fprintf(stream_,
+ "# %s%*s: %s",
+ clr_vreg_name,
+ kPrintRegisterNameFieldWidth,
+ name.str().c_str(),
+ clr_vreg_value);
+ PrintRegisterValue(value, size, format);
+ fprintf(stream_, "%s", clr_normal);
+ if ((format & kPrintRegAsFP) != 0) {
+ PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
}
+ fprintf(stream_, "%s", suffix);
+}
+
+void Simulator::PrintPartialPRegister(const char* name,
+ const SimPRegister& reg,
+ int q_index,
+ PrintRegisterFormat format,
+ const char* suffix) {
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+ VIXL_ASSERT((format & kPrintRegPartial) != 0);
+ VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
- fprintf(stream_, "\n");
+ // We don't currently use the format for anything here.
+ USE(format);
+
+ // We _only_ trace partial P register values, because they're often too large
+ // to reasonably fit on a single line. Each line implies nothing about the
+ // unprinted bits.
+ //
+ // We print values in binary, with spaces between each bit, in order for the
+ // bits to align with the Z register bytes that they predicate.
+ // "# {name}<15:0>: 0b{-------------value------------}"
+
+ int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
+ int lsb = q_index * print_size_in_bits;
+ int msb = lsb + print_size_in_bits - 1;
+ std::stringstream prefix;
+ prefix << name << '<' << msb << ':' << lsb << '>';
+
+ fprintf(stream_,
+ "# %s%*s: %s0b",
+ clr_preg_name,
+ kPrintRegisterNameFieldWidth,
+ prefix.str().c_str(),
+ clr_preg_value);
+ for (int i = msb; i >= lsb; i--) {
+ fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
+ }
+ fprintf(stream_, "%s%s", clr_normal, suffix);
}
+void Simulator::PrintPartialPRegister(int code,
+ int q_index,
+ PrintRegisterFormat format,
+ const char* suffix) {
+ VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
+ PrintPartialPRegister(PRegNameForCode(code),
+ pregisters_[code],
+ q_index,
+ format,
+ suffix);
+}
void Simulator::PrintSystemRegister(SystemRegister id) {
switch (id) {
@@ -954,90 +1225,347 @@ void Simulator::PrintSystemRegister(SystemRegister id) {
}
}
-
-void Simulator::PrintRead(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format) {
- registers_[reg_code].NotifyRegisterLogged();
-
- USE(format);
-
- // The template is "# {reg}: 0x{value} <- {address}".
- PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister);
+uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
+ uint16_t future_access_mask,
+ int struct_element_count,
+ int lane_size_in_bytes,
+ const char* op,
+ uintptr_t address,
+ int reg_size_in_bytes) {
+ // We want to assume that we'll access at least one lane.
+ VIXL_ASSERT(access_mask != 0);
+ VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
+ (reg_size_in_bytes == kQRegSizeInBytes));
+ bool started_annotation = false;
+ // Indent to match the register field, the fixed formatting, and the value
+ // prefix ("0x"): "# {name}: 0x"
+ fprintf(stream_, "# %*s ", kPrintRegisterNameFieldWidth, "");
+ // First, annotate the lanes (byte by byte).
+ for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
+ bool access = (access_mask & (1 << lane)) != 0;
+ bool future = (future_access_mask & (1 << lane)) != 0;
+ if (started_annotation) {
+ // If we've started an annotation, draw a horizontal line in addition to
+ // any other symbols.
+ if (access) {
+ fprintf(stream_, "─╨");
+ } else if (future) {
+ fprintf(stream_, "─║");
+ } else {
+ fprintf(stream_, "──");
+ }
+ } else {
+ if (access) {
+ started_annotation = true;
+ fprintf(stream_, " ╙");
+ } else if (future) {
+ fprintf(stream_, " ║");
+ } else {
+ fprintf(stream_, " ");
+ }
+ }
+ }
+ VIXL_ASSERT(started_annotation);
+ fprintf(stream_, "─ 0x");
+ int lane_size_in_nibbles = lane_size_in_bytes * 2;
+ // Print the most-significant struct element first.
+ const char* sep = "";
+ for (int i = struct_element_count - 1; i >= 0; i--) {
+ int offset = lane_size_in_bytes * i;
+ uint64_t nibble = Memory::Read(lane_size_in_bytes, address + offset);
+ fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
+ sep = "'";
+ }
fprintf(stream_,
- " <- %s0x%016" PRIxPTR "%s\n",
+ " %s %s0x%016" PRIxPTR "%s\n",
+ op,
clr_memory_address,
address,
clr_normal);
+ return future_access_mask & ~access_mask;
}
-
-void Simulator::PrintVRead(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format,
- unsigned lane) {
- vregisters_[reg_code].NotifyRegisterLogged();
-
- // The template is "# v{code}: 0x{rawbits} <- address".
- PrintVRegisterRawHelper(reg_code);
- if (format & kPrintRegAsFP) {
- PrintVRegisterFPHelper(reg_code,
- GetPrintRegLaneSizeInBytes(format),
- GetPrintRegLaneCount(format),
- lane);
+void Simulator::PrintAccess(int code,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address) {
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+ if ((format & kPrintRegPartial) == 0) {
+ registers_[code].NotifyRegisterLogged();
}
+ // Scalar-format accesses use a simple format:
+ // "# {reg}: 0x{value} -> {address}"
+
+ // Suppress the newline, so the access annotation goes on the same line.
+ PrintRegister(code, format, "");
fprintf(stream_,
- " <- %s0x%016" PRIxPTR "%s\n",
+ " %s %s0x%016" PRIxPTR "%s\n",
+ op,
clr_memory_address,
address,
clr_normal);
}
+void Simulator::PrintVAccess(int code,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
-void Simulator::PrintWrite(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format) {
- VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ // Scalar-format accesses use a simple format:
+ // "# v{code}: 0x{value} -> {address}"
- // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy
- // and readable, the value is aligned with the values in the register trace.
- PrintRegisterRawHelper(reg_code,
- Reg31IsZeroRegister,
- GetPrintRegSizeInBytes(format));
+ // Suppress the newline, so the access annotation goes on the same line.
+ PrintVRegister(code, format, "");
fprintf(stream_,
- " -> %s0x%016" PRIxPTR "%s\n",
+ " %s %s0x%016" PRIxPTR "%s\n",
+ op,
clr_memory_address,
address,
clr_normal);
}
+void Simulator::PrintVStructAccess(int rt_code,
+ int reg_count,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+ // For example:
+ // "# v{code}: 0x{value}"
+ // "# ...: 0x{value}"
+ // "# ║ ╙─ {struct_value} -> {lowest_address}"
+ // "# ╙───── {struct_value} -> {highest_address}"
+
+ uint16_t lane_mask = GetPrintRegLaneMask(format);
+ PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
+
+ int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
+ int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
+ for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
+ uint16_t access_mask = 1 << i;
+ VIXL_ASSERT((lane_mask & access_mask) != 0);
+ lane_mask = PrintPartialAccess(access_mask,
+ lane_mask,
+ reg_count,
+ lane_size_in_bytes,
+ op,
+ address + (i * reg_count));
+ }
+}
+
+void Simulator::PrintVSingleStructAccess(int rt_code,
+ int reg_count,
+ int lane,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+ // For example:
+ // "# v{code}: 0x{value}"
+ // "# ...: 0x{value}"
+ // "# ╙───── {struct_value} -> {address}"
+
+ int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
+ uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
+ PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
+ PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
+}
+
+void Simulator::PrintVReplicatingStructAccess(int rt_code,
+ int reg_count,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+ // For example:
+ // "# v{code}: 0x{value}"
+ // "# ...: 0x{value}"
+ // "# ╙─╨─╨─╨─ {struct_value} -> {address}"
+
+ int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
+ uint16_t lane_mask = GetPrintRegLaneMask(format);
+ PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
+ PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
+}
+
+void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+ // Scalar-format accesses are split into separate chunks, each of which uses a
+ // simple format:
+ // "# z{code}<127:0>: 0x{value} -> {address}"
+ // "# z{code}<255:128>: 0x{value} -> {address + 16}"
+ // "# z{code}<383:256>: 0x{value} -> {address + 32}"
+ // etc
+
+ int vl = GetVectorLengthInBits();
+ VIXL_ASSERT((vl % kQRegSize) == 0);
+ for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
+ // Suppress the newline, so the access annotation goes on the same line.
+ PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
+ fprintf(stream_,
+ " %s %s0x%016" PRIxPTR "%s\n",
+ op,
+ clr_memory_address,
+ address,
+ clr_normal);
+ address += kQRegSizeInBytes;
+ }
+}
+
+void Simulator::PrintZStructAccess(int rt_code,
+ int reg_count,
+ const LogicPRegister& pg,
+ PrintRegisterFormat format,
+ int msize_in_bytes,
+ const char* op,
+ const LogicSVEAddressVector& addr) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+ // For example:
+ // "# z{code}<255:128>: 0x{value}"
+ // "# ...<255:128>: 0x{value}"
+ // "# ║ ╙─ {struct_value} -> {first_address}"
+ // "# ╙───── {struct_value} -> {last_address}"
+
+ // We're going to print the register in parts, so force a partial format.
+ bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
+ format = GetPrintRegPartial(format);
+
+ int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
+ int vl = GetVectorLengthInBits();
+ VIXL_ASSERT((vl % kQRegSize) == 0);
+ int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
+ for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
+ uint16_t pred =
+ pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
+ if ((pred == 0) && skip_inactive_chunks) continue;
+
+ PrintZRegistersForStructuredAccess(rt_code,
+ q_index,
+ reg_count,
+ pred,
+ format);
+ if (pred == 0) {
+ // This register chunk has no active lanes. The loop below would print
+ // nothing, so leave a blank line to keep structures grouped together.
+ fprintf(stream_, "#\n");
+ continue;
+ }
+ for (int i = 0; i < lanes_per_q; i++) {
+ uint16_t access = 1 << (i * esize_in_bytes);
+ int lane = (q_index * lanes_per_q) + i;
+ // Skip inactive lanes.
+ if ((pred & access) == 0) continue;
+ pred = PrintPartialAccess(access,
+ pred,
+ reg_count,
+ msize_in_bytes,
+ op,
+ addr.GetStructAddress(lane));
+ }
+ }
-void Simulator::PrintVWrite(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format,
- unsigned lane) {
- // The templates:
- // "# v{code}: 0x{rawbits} -> {address}"
- // "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}".
- // "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}"
- // Because this trace doesn't represent a change to the source register's
- // value, only the relevant part of the value is printed. To keep the trace
- // tidy and readable, the raw value is aligned with the other values in the
- // register trace.
- int lane_count = GetPrintRegLaneCount(format);
- int lane_size = GetPrintRegLaneSizeInBytes(format);
- int reg_size = GetPrintRegSizeInBytes(format);
- PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane);
- if (format & kPrintRegAsFP) {
- PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane);
+ // We print the whole register, even for stores.
+ for (int i = 0; i < reg_count; i++) {
+ vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
+ }
+}
+
+void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
+ VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+ // Scalar-format accesses are split into separate chunks, each of which uses a
+ // simple format:
+ // "# p{code}<15:0>: 0b{value} -> {address}"
+ // "# p{code}<31:16>: 0b{value} -> {address + 2}"
+ // "# p{code}<47:32>: 0b{value} -> {address + 4}"
+ // etc
+
+ int vl = GetVectorLengthInBits();
+ VIXL_ASSERT((vl % kQRegSize) == 0);
+ for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
+ // Suppress the newline, so the access annotation goes on the same line.
+ PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
+ fprintf(stream_,
+ " %s %s0x%016" PRIxPTR "%s\n",
+ op,
+ clr_memory_address,
+ address,
+ clr_normal);
+ address += kQRegSizeInBytes;
}
- fprintf(stream_,
- " -> %s0x%016" PRIxPTR "%s\n",
- clr_memory_address,
- address,
- clr_normal);
}
+void Simulator::PrintRead(int rt_code,
+ PrintRegisterFormat format,
+ uintptr_t address) {
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ registers_[rt_code].NotifyRegisterLogged();
+ PrintAccess(rt_code, format, "<-", address);
+}
+
+void Simulator::PrintExtendingRead(int rt_code,
+ PrintRegisterFormat format,
+ int access_size_in_bytes,
+ uintptr_t address) {
+ int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
+ if (access_size_in_bytes == reg_size_in_bytes) {
+ // There is no extension here, so print a simple load.
+ PrintRead(rt_code, format, address);
+ return;
+ }
+ VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
+
+ // For sign- and zero-extension, make it clear that the resulting register
+ // value is different from what is loaded from memory.
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ registers_[rt_code].NotifyRegisterLogged();
+ PrintRegister(rt_code, format);
+ PrintPartialAccess(1,
+ 0,
+ 1,
+ access_size_in_bytes,
+ "<-",
+ address,
+ kXRegSizeInBytes);
+}
+
+void Simulator::PrintVRead(int rt_code,
+ PrintRegisterFormat format,
+ uintptr_t address) {
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ vregisters_[rt_code].NotifyRegisterLogged();
+ PrintVAccess(rt_code, format, "<-", address);
+}
+
+void Simulator::PrintWrite(int rt_code,
+ PrintRegisterFormat format,
+ uintptr_t address) {
+ // Because this trace doesn't represent a change to the source register's
+ // value, only print the relevant part of the value.
+ format = GetPrintRegPartial(format);
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ registers_[rt_code].NotifyRegisterLogged();
+ PrintAccess(rt_code, format, "->", address);
+}
+
+void Simulator::PrintVWrite(int rt_code,
+ PrintRegisterFormat format,
+ uintptr_t address) {
+ // Because this trace doesn't represent a change to the source register's
+ // value, only print the relevant part of the value.
+ format = GetPrintRegPartial(format);
+ // It only makes sense to write scalar values here. Vectors are handled by
+ // PrintVStructAccess.
+ VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+ PrintVAccess(rt_code, format, "->", address);
+}
void Simulator::PrintTakenBranch(const Instruction* target) {
fprintf(stream_,
@@ -1047,7 +1575,6 @@ void Simulator::PrintTakenBranch(const Instruction* target) {
reinterpret_cast<uint64_t>(target));
}
-
// Visitors---------------------------------------------------------------------
@@ -1289,7 +1816,7 @@ void Simulator::VisitAddSubShifted(const Instruction* instr) {
void Simulator::VisitAddSubImmediate(const Instruction* instr) {
int64_t op2 = instr->GetImmAddSub()
- << ((instr->GetShiftAddSub() == 1) ? 12 : 0);
+ << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
AddSubHelper(instr, op2);
}
@@ -1489,7 +2016,7 @@ void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
// Approximate load-acquire by issuing a full barrier after the load.
__sync_synchronize();
- LogRead(address, rt, GetPrintRegisterFormat(element_size));
+ LogRead(rt, GetPrintRegisterFormat(element_size), address);
}
@@ -1516,7 +2043,7 @@ void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
Memory::Write<T>(address, ReadRegister<T>(rt));
- LogWrite(address, rt, GetPrintRegisterFormat(element_size));
+ LogWrite(rt, GetPrintRegisterFormat(element_size), address);
}
@@ -1603,7 +2130,7 @@ void Simulator::VisitLoadStorePAC(const Instruction* instr) {
WriteXRegister(dst, Memory::Read<uint64_t>(addr_ptr), NoRegLog);
unsigned access_size = 1 << 3;
- LogRead(addr_ptr, dst, GetPrintRegisterFormatForSize(access_size));
+ LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
}
@@ -1624,49 +2151,65 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
unsigned srcdst = instr->GetRt();
uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
+ bool rt_is_vreg = false;
+ int extend_to_size = 0;
LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
switch (op) {
case LDRB_w:
WriteWRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog);
+ extend_to_size = kWRegSizeInBytes;
break;
case LDRH_w:
WriteWRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog);
+ extend_to_size = kWRegSizeInBytes;
break;
case LDR_w:
WriteWRegister(srcdst, Memory::Read<uint32_t>(address), NoRegLog);
+ extend_to_size = kWRegSizeInBytes;
break;
case LDR_x:
WriteXRegister(srcdst, Memory::Read<uint64_t>(address), NoRegLog);
+ extend_to_size = kXRegSizeInBytes;
break;
case LDRSB_w:
WriteWRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog);
+ extend_to_size = kWRegSizeInBytes;
break;
case LDRSH_w:
WriteWRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog);
+ extend_to_size = kWRegSizeInBytes;
break;
case LDRSB_x:
WriteXRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog);
+ extend_to_size = kXRegSizeInBytes;
break;
case LDRSH_x:
WriteXRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog);
+ extend_to_size = kXRegSizeInBytes;
break;
case LDRSW_x:
WriteXRegister(srcdst, Memory::Read<int32_t>(address), NoRegLog);
+ extend_to_size = kXRegSizeInBytes;
break;
case LDR_b:
WriteBRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog);
+ rt_is_vreg = true;
break;
case LDR_h:
WriteHRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog);
+ rt_is_vreg = true;
break;
case LDR_s:
WriteSRegister(srcdst, Memory::Read<float>(address), NoRegLog);
+ rt_is_vreg = true;
break;
case LDR_d:
WriteDRegister(srcdst, Memory::Read<double>(address), NoRegLog);
+ rt_is_vreg = true;
break;
case LDR_q:
WriteQRegister(srcdst, Memory::Read<qreg_t>(address), NoRegLog);
+ rt_is_vreg = true;
break;
case STRB_w:
@@ -1683,18 +2226,23 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
break;
case STR_b:
Memory::Write<uint8_t>(address, ReadBRegister(srcdst));
+ rt_is_vreg = true;
break;
case STR_h:
Memory::Write<uint16_t>(address, ReadHRegisterBits(srcdst));
+ rt_is_vreg = true;
break;
case STR_s:
Memory::Write<float>(address, ReadSRegister(srcdst));
+ rt_is_vreg = true;
break;
case STR_d:
Memory::Write<double>(address, ReadDRegister(srcdst));
+ rt_is_vreg = true;
break;
case STR_q:
Memory::Write<qreg_t>(address, ReadQRegister(srcdst));
+ rt_is_vreg = true;
break;
// Ignore prfm hint instructions.
@@ -1705,22 +2253,25 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
VIXL_UNIMPLEMENTED();
}
+ // Print a detailed trace (including the memory address).
+ bool extend = (extend_to_size != 0);
unsigned access_size = 1 << instr->GetSizeLS();
+ unsigned result_size = extend ? extend_to_size : access_size;
+ PrintRegisterFormat print_format =
+ rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
+ : GetPrintRegisterFormatForSize(result_size);
+
if (instr->IsLoad()) {
- if ((op == LDR_s) || (op == LDR_d)) {
- LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
- } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) {
- LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ if (rt_is_vreg) {
+ LogVRead(srcdst, print_format, address);
} else {
- LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ LogExtendingRead(srcdst, print_format, access_size, address);
}
} else if (instr->IsStore()) {
- if ((op == STR_s) || (op == STR_d)) {
- LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
- } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
- LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ if (rt_is_vreg) {
+ LogVWrite(srcdst, print_format, address);
} else {
- LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+ LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
}
} else {
VIXL_ASSERT(op == PRFM);
@@ -1765,6 +2316,8 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
// 'rt' and 'rt2' can only be aliased for stores.
VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
+ bool rt_is_vreg = false;
+ bool sign_extend = false;
switch (op) {
// Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
// will print a more detailed log.
@@ -1776,6 +2329,7 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
case LDP_s: {
WriteSRegister(rt, Memory::Read<float>(address), NoRegLog);
WriteSRegister(rt2, Memory::Read<float>(address2), NoRegLog);
+ rt_is_vreg = true;
break;
}
case LDP_x: {
@@ -1786,16 +2340,19 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
case LDP_d: {
WriteDRegister(rt, Memory::Read<double>(address), NoRegLog);
WriteDRegister(rt2, Memory::Read<double>(address2), NoRegLog);
+ rt_is_vreg = true;
break;
}
case LDP_q: {
WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog);
WriteQRegister(rt2, Memory::Read<qreg_t>(address2), NoRegLog);
+ rt_is_vreg = true;
break;
}
case LDPSW_x: {
WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog);
WriteXRegister(rt2, Memory::Read<int32_t>(address2), NoRegLog);
+ sign_extend = true;
break;
}
case STP_w: {
@@ -1806,6 +2363,7 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
case STP_s: {
Memory::Write<float>(address, ReadSRegister(rt));
Memory::Write<float>(address2, ReadSRegister(rt2));
+ rt_is_vreg = true;
break;
}
case STP_x: {
@@ -1816,40 +2374,43 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
case STP_d: {
Memory::Write<double>(address, ReadDRegister(rt));
Memory::Write<double>(address2, ReadDRegister(rt2));
+ rt_is_vreg = true;
break;
}
case STP_q: {
Memory::Write<qreg_t>(address, ReadQRegister(rt));
Memory::Write<qreg_t>(address2, ReadQRegister(rt2));
+ rt_is_vreg = true;
break;
}
default:
VIXL_UNREACHABLE();
}
- // Print a detailed trace (including the memory address) instead of the basic
- // register:value trace generated by set_*reg().
+ // Print a detailed trace (including the memory address).
+ unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
+ PrintRegisterFormat print_format =
+ rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
+ : GetPrintRegisterFormatForSize(result_size);
+
if (instr->IsLoad()) {
- if ((op == LDP_s) || (op == LDP_d)) {
- LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
- LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
- } else if (op == LDP_q) {
- LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size));
- LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ if (rt_is_vreg) {
+ LogVRead(rt, print_format, address);
+ LogVRead(rt2, print_format, address2);
+ } else if (sign_extend) {
+ LogExtendingRead(rt, print_format, element_size, address);
+ LogExtendingRead(rt2, print_format, element_size, address2);
} else {
- LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
- LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ LogRead(rt, print_format, address);
+ LogRead(rt2, print_format, address2);
}
} else {
- if ((op == STP_s) || (op == STP_d)) {
- LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
- LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
- } else if (op == STP_q) {
- LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
- LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ if (rt_is_vreg) {
+ LogVWrite(rt, print_format, address);
+ LogVWrite(rt2, print_format, address2);
} else {
- LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
- LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(rt, print_format, address);
+ LogWrite(rt2, print_format, address2);
}
}
@@ -1890,10 +2451,10 @@ void Simulator::CompareAndSwapHelper(const Instruction* instr) {
__sync_synchronize();
}
Memory::Write<T>(address, newvalue);
- LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
}
- WriteRegister<T>(rs, data);
- LogRead(address, rs, GetPrintRegisterFormatForSize(element_size));
+ WriteRegister<T>(rs, data, NoRegLog);
+ LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
}
@@ -1904,7 +2465,7 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
unsigned rt = instr->GetRt();
unsigned rn = instr->GetRn();
- VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0));
+ VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
unsigned element_size = sizeof(T);
uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
@@ -1925,8 +2486,8 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
// associated with that location, even if the compare subsequently fails.
local_monitor_.Clear();
- T data_high = Memory::Read<T>(address);
- T data_low = Memory::Read<T>(address2);
+ T data_low = Memory::Read<T>(address);
+ T data_high = Memory::Read<T>(address2);
if (is_acquire) {
// Approximate load-acquire by issuing a full barrier after the load.
@@ -1941,22 +2502,82 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
__sync_synchronize();
}
- Memory::Write<T>(address, newvalue_high);
- Memory::Write<T>(address2, newvalue_low);
+ Memory::Write<T>(address, newvalue_low);
+ Memory::Write<T>(address2, newvalue_high);
}
- WriteRegister<T>(rs + 1, data_high);
- WriteRegister<T>(rs, data_low);
+ WriteRegister<T>(rs + 1, data_high, NoRegLog);
+ WriteRegister<T>(rs, data_low, NoRegLog);
- LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size));
- LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size));
+ PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
+ LogRead(rs, format, address);
+ LogRead(rs + 1, format, address2);
if (same) {
- LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size));
- LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(rt, format, address);
+ LogWrite(rt + 1, format, address2);
+ }
+}
+
+bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
+ // To simulate fault-tolerant loads, we need to know what host addresses we
+ // can access without generating a real fault. One way to do that is to
+ // attempt to `write()` the memory to a dummy pipe[1]. This is more portable
+ // and less intrusive than using (global) signal handlers.
+ //
+ // [1]: https://stackoverflow.com/questions/7134590
+
+ size_t written = 0;
+ bool can_read = true;
+ // `write` will normally return after one invocation, but it is allowed to
+ // handle only part of the operation, so wrap it in a loop.
+ while (can_read && (written < size)) {
+ ssize_t result = write(dummy_pipe_fd_[1],
+ reinterpret_cast<void*>(address + written),
+ size - written);
+ if (result > 0) {
+ written += result;
+ } else {
+ switch (result) {
+ case -EPERM:
+ case -EFAULT:
+ // The address range is not accessible.
+ // `write` is supposed to return -EFAULT in this case, but in practice
+ // it seems to return -EPERM, so we accept that too.
+ can_read = false;
+ break;
+ case -EINTR:
+ // The call was interrupted by a signal. Just try again.
+ break;
+ default:
+ // Any other error is fatal.
+ VIXL_ABORT();
+ }
+ }
+ }
+ // Drain the read side of the pipe. If we don't do this, we'll leak memory as
+ // the dummy data is buffered. As before, we expect to drain the whole write
+ // in one invocation, but cannot guarantee that, so we wrap it in a loop. This
+ // function is primarily intended to implement SVE fault-tolerant loads, so
+ // the maximum Z register size is a good default buffer size.
+ char buffer[kZRegMaxSizeInBytes];
+ while (written > 0) {
+ ssize_t result = read(dummy_pipe_fd_[0],
+ reinterpret_cast<void*>(buffer),
+ sizeof(buffer));
+ // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
+ // we've read everything that was written, so treat 0 as an error.
+ if (result > 0) {
+ VIXL_ASSERT(static_cast<size_t>(result) <= written);
+ written -= result;
+ } else {
+ // For -EINTR, just try again. We can't handle any other error.
+ VIXL_CHECK(result == -EINTR);
+ }
}
-}
+ return can_read;
+}
void Simulator::PrintExclusiveAccessWarning() {
if (print_exclusive_access_warning_) {
@@ -1971,7 +2592,6 @@ void Simulator::PrintExclusiveAccessWarning() {
}
}
-
void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
LoadStoreExclusive op =
static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
@@ -2045,30 +2665,35 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
// Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
// We will print a more detailed log.
+ unsigned reg_size = 0;
switch (op) {
case LDXRB_w:
case LDAXRB_w:
case LDARB_w:
case LDLARB:
WriteWRegister(rt, Memory::Read<uint8_t>(address), NoRegLog);
+ reg_size = kWRegSizeInBytes;
break;
case LDXRH_w:
case LDAXRH_w:
case LDARH_w:
case LDLARH:
WriteWRegister(rt, Memory::Read<uint16_t>(address), NoRegLog);
+ reg_size = kWRegSizeInBytes;
break;
case LDXR_w:
case LDAXR_w:
case LDAR_w:
case LDLAR_w:
WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog);
+ reg_size = kWRegSizeInBytes;
break;
case LDXR_x:
case LDAXR_x:
case LDAR_x:
case LDLAR_x:
WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog);
+ reg_size = kXRegSizeInBytes;
break;
case LDXP_w:
case LDAXP_w:
@@ -2076,6 +2701,7 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
WriteWRegister(rt2,
Memory::Read<uint32_t>(address + element_size),
NoRegLog);
+ reg_size = kWRegSizeInBytes;
break;
case LDXP_x:
case LDAXP_x:
@@ -2083,6 +2709,7 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
WriteXRegister(rt2,
Memory::Read<uint64_t>(address + element_size),
NoRegLog);
+ reg_size = kXRegSizeInBytes;
break;
default:
VIXL_UNREACHABLE();
@@ -2093,11 +2720,10 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
__sync_synchronize();
}
- LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+ PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
+ LogExtendingRead(rt, format, element_size, address);
if (is_pair) {
- LogRead(address + element_size,
- rt2,
- GetPrintRegisterFormatForSize(element_size));
+ LogExtendingRead(rt2, format, element_size, address + element_size);
}
} else {
if (is_acquire_release) {
@@ -2161,11 +2787,11 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
VIXL_UNREACHABLE();
}
- LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+ PrintRegisterFormat format =
+ GetPrintRegisterFormatForSize(element_size);
+ LogWrite(rt, format, address);
if (is_pair) {
- LogWrite(address + element_size,
- rt2,
- GetPrintRegisterFormatForSize(element_size));
+ LogWrite(rt2, format, address + element_size);
}
}
}
@@ -2232,8 +2858,9 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
Memory::Write<T>(address, result);
WriteRegister<T>(rt, data, NoRegLog);
- LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
- LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size));
+ PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
+ LogRead(rt, format, address);
+ LogWrite(rs, format, address);
}
template <typename T>
@@ -2264,8 +2891,9 @@ void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
WriteRegister<T>(rt, data);
- LogRead(address, rt, GetPrintRegisterFormat(element_size));
- LogWrite(address, rs, GetPrintRegisterFormat(element_size));
+ PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
+ LogRead(rt, format, address);
+ LogWrite(rs, format, address);
}
template <typename T>
@@ -2283,7 +2911,7 @@ void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
// Approximate load-acquire by issuing a full barrier after the load.
__sync_synchronize();
- LogRead(address, rt, GetPrintRegisterFormat(element_size));
+ LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
}
#define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
@@ -2400,27 +3028,27 @@ void Simulator::VisitLoadLiteral(const Instruction* instr) {
// print a more detailed log.
case LDR_w_lit:
WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog);
- LogRead(address, rt, kPrintWReg);
+ LogRead(rt, kPrintWReg, address);
break;
case LDR_x_lit:
WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog);
- LogRead(address, rt, kPrintXReg);
+ LogRead(rt, kPrintXReg, address);
break;
case LDR_s_lit:
WriteSRegister(rt, Memory::Read<float>(address), NoRegLog);
- LogVRead(address, rt, kPrintSReg);
+ LogVRead(rt, kPrintSRegFP, address);
break;
case LDR_d_lit:
WriteDRegister(rt, Memory::Read<double>(address), NoRegLog);
- LogVRead(address, rt, kPrintDReg);
+ LogVRead(rt, kPrintDRegFP, address);
break;
case LDR_q_lit:
WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog);
- LogVRead(address, rt, kPrintReg1Q);
+ LogVRead(rt, kPrintReg1Q, address);
break;
case LDRSW_x_lit:
WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog);
- LogRead(address, rt, kPrintWReg);
+ LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
break;
// Ignore prfm hint instructions.
@@ -2795,40 +3423,6 @@ void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
}
-// The algorithm used is adapted from the one described in section 8.2 of
-// Hacker's Delight, by Henry S. Warren, Jr.
-template <typename T>
-static int64_t MultiplyHigh(T u, T v) {
- uint64_t u0, v0, w0, u1, v1, w1, w2, t;
- uint64_t sign_mask = UINT64_C(0x8000000000000000);
- uint64_t sign_ext = 0;
- if (std::numeric_limits<T>::is_signed) {
- sign_ext = UINT64_C(0xffffffff00000000);
- }
-
- VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
- VIXL_ASSERT(sizeof(u) == sizeof(u0));
-
- u0 = u & 0xffffffff;
- u1 = u >> 32 | (((u & sign_mask) != 0) ? sign_ext : 0);
- v0 = v & 0xffffffff;
- v1 = v >> 32 | (((v & sign_mask) != 0) ? sign_ext : 0);
-
- w0 = u0 * v0;
- t = u1 * v0 + (w0 >> 32);
-
- w1 = t & 0xffffffff;
- w2 = t >> 32 | (((t & sign_mask) != 0) ? sign_ext : 0);
- w1 = u0 * v1 + w1;
- w1 = w1 >> 32 | (((w1 & sign_mask) != 0) ? sign_ext : 0);
-
- uint64_t value = u1 * v1 + w2 + w1;
- int64_t result;
- memcpy(&result, &value, sizeof(result));
- return result;
-}
-
-
void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
@@ -2864,12 +3458,13 @@ void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
break;
case UMULH_x:
- result = MultiplyHigh(ReadRegister<uint64_t>(instr->GetRn()),
- ReadRegister<uint64_t>(instr->GetRm()));
+ result =
+ internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
+ ReadRegister<uint64_t>(instr->GetRm()));
break;
case SMULH_x:
- result = MultiplyHigh(ReadXRegister(instr->GetRn()),
- ReadXRegister(instr->GetRm()));
+ result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
+ ReadXRegister(instr->GetRm()));
break;
default:
VIXL_UNIMPLEMENTED();
@@ -2936,9 +3531,10 @@ void Simulator::VisitExtract(const Instruction* instr) {
unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
uint64_t low_res =
static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
- uint64_t high_res =
- (lsb == 0) ? 0 : ReadRegister<uint64_t>(reg_size, instr->GetRn())
- << (reg_size - lsb);
+ uint64_t high_res = (lsb == 0)
+ ? 0
+ : ReadRegister<uint64_t>(reg_size, instr->GetRn())
+ << (reg_size - lsb);
WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
}
@@ -3948,8 +4544,8 @@ void Simulator::VisitSystem(const Instruction* instr) {
break;
case RNDR:
case RNDRRS: {
- uint64_t high = jrand48(rndr_state_);
- uint64_t low = jrand48(rndr_state_);
+ uint64_t high = jrand48(rand_state_);
+ uint64_t low = jrand48(rand_state_);
uint64_t rand_num = (high << 32) | (low & 0xffffffff);
WriteXRegister(instr->GetRt(), rand_num);
// Simulate successful random number generation.
@@ -4530,10 +5126,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) {
fminnm(vf, rd, rn, rm);
break;
case NEON_FMLA:
- fmla(vf, rd, rn, rm);
+ fmla(vf, rd, rd, rn, rm);
break;
case NEON_FMLS:
- fmls(vf, rd, rn, rm);
+ fmls(vf, rd, rd, rn, rm);
break;
case NEON_FMULX:
fmulx(vf, rd, rn, rm);
@@ -4624,10 +5220,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) {
cmptst(vf, rd, rn, rm);
break;
case NEON_MLS:
- mls(vf, rd, rn, rm);
+ mls(vf, rd, rd, rn, rm);
break;
case NEON_MLA:
- mla(vf, rd, rn, rm);
+ mla(vf, rd, rd, rn, rm);
break;
case NEON_MUL:
mul(vf, rd, rn, rm);
@@ -4754,13 +5350,11 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
B(vf, rd, rn, rm); \
break;
SIM_FUNC(FMAXNM, fmaxnm);
- SIM_FUNC(FMLA, fmla);
SIM_FUNC(FADD, fadd);
SIM_FUNC(FMULX, fmulx);
SIM_FUNC(FMAX, fmax);
SIM_FUNC(FRECPS, frecps);
SIM_FUNC(FMINNM, fminnm);
- SIM_FUNC(FMLS, fmls);
SIM_FUNC(FSUB, fsub);
SIM_FUNC(FMIN, fmin);
SIM_FUNC(FRSQRTS, frsqrts);
@@ -4773,6 +5367,12 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
SIM_FUNC(FABD, fabd);
SIM_FUNC(FMINP, fminp);
#undef SIM_FUNC
+ case NEON_FMLA_H:
+ fmla(vf, rd, rd, rn, rm);
+ break;
+ case NEON_FMLS_H:
+ fmls(vf, rd, rd, rn, rm);
+ break;
case NEON_FCMEQ_H:
fcmp(vf, rd, rn, rm, eq);
break;
@@ -4803,7 +5403,7 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
VectorFormat vf = nfd.GetVectorFormat();
if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) {
rot = instr->GetImmRotFcmlaVec();
- fcmla(vf, rd, rn, rm, rot);
+ fcmla(vf, rd, rn, rm, rd, rot);
} else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) {
rot = instr->GetImmRotFcadd();
fcadd(vf, rd, rn, rm, rot);
@@ -5347,7 +5947,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
addr[i] = addr_base + (i * reg_size);
}
- int count = 1;
+ int struct_parts = 1;
+ int reg_count = 1;
bool log_read = true;
// Bit 23 determines whether this is an offset or post-index addressing mode.
@@ -5363,17 +5964,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
case NEON_LD1_4v:
case NEON_LD1_4v_post:
ld1(vf, ReadVRegister(reg[3]), addr[3]);
- count++;
+ reg_count++;
VIXL_FALLTHROUGH();
case NEON_LD1_3v:
case NEON_LD1_3v_post:
ld1(vf, ReadVRegister(reg[2]), addr[2]);
- count++;
+ reg_count++;
VIXL_FALLTHROUGH();
case NEON_LD1_2v:
case NEON_LD1_2v_post:
ld1(vf, ReadVRegister(reg[1]), addr[1]);
- count++;
+ reg_count++;
VIXL_FALLTHROUGH();
case NEON_LD1_1v:
case NEON_LD1_1v_post:
@@ -5382,17 +5983,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
case NEON_ST1_4v:
case NEON_ST1_4v_post:
st1(vf, ReadVRegister(reg[3]), addr[3]);
- count++;
+ reg_count++;
VIXL_FALLTHROUGH();
case NEON_ST1_3v:
case NEON_ST1_3v_post:
st1(vf, ReadVRegister(reg[2]), addr[2]);
- count++;
+ reg_count++;
VIXL_FALLTHROUGH();
case NEON_ST1_2v:
case NEON_ST1_2v_post:
st1(vf, ReadVRegister(reg[1]), addr[1]);
- count++;
+ reg_count++;
VIXL_FALLTHROUGH();
case NEON_ST1_1v:
case NEON_ST1_1v_post:
@@ -5402,12 +6003,14 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
case NEON_LD2_post:
case NEON_LD2:
ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
- count = 2;
+ struct_parts = 2;
+ reg_count = 2;
break;
case NEON_ST2:
case NEON_ST2_post:
st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
- count = 2;
+ struct_parts = 2;
+ reg_count = 2;
log_read = false;
break;
case NEON_LD3_post:
@@ -5417,7 +6020,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
ReadVRegister(reg[1]),
ReadVRegister(reg[2]),
addr[0]);
- count = 3;
+ struct_parts = 3;
+ reg_count = 3;
break;
case NEON_ST3:
case NEON_ST3_post:
@@ -5426,7 +6030,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
ReadVRegister(reg[1]),
ReadVRegister(reg[2]),
addr[0]);
- count = 3;
+ struct_parts = 3;
+ reg_count = 3;
log_read = false;
break;
case NEON_ST4:
@@ -5437,7 +6042,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
ReadVRegister(reg[2]),
ReadVRegister(reg[3]),
addr[0]);
- count = 4;
+ struct_parts = 4;
+ reg_count = 4;
log_read = false;
break;
case NEON_LD4_post:
@@ -5448,22 +6054,31 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
ReadVRegister(reg[2]),
ReadVRegister(reg[3]),
addr[0]);
- count = 4;
+ struct_parts = 4;
+ reg_count = 4;
break;
default:
VIXL_UNIMPLEMENTED();
}
- // Explicitly log the register update whilst we have type information.
- for (int i = 0; i < count; i++) {
- // For de-interleaving loads, only print the base address.
- int lane_size = LaneSizeInBytesFromFormat(vf);
- PrintRegisterFormat format = GetPrintRegisterFormatTryFP(
- GetPrintRegisterFormatForSize(reg_size, lane_size));
+ bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
+ if (do_trace) {
+ PrintRegisterFormat print_format =
+ GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
+ const char* op;
if (log_read) {
- LogVRead(addr_base, reg[i], format);
+ op = "<-";
} else {
- LogVWrite(addr_base, reg[i], format);
+ op = "->";
+ // Stores don't represent a change to the source register's value, so only
+ // print the relevant part of the value.
+ print_format = GetPrintRegPartial(print_format);
+ }
+
+ VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
+ for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
+ uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
+ PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
}
}
@@ -5471,7 +6086,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
int rm = instr->GetRm();
// The immediate post index addressing mode is indicated by rm = 31.
// The immediate is implied by the number of vector registers used.
- addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count
+ addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
: ReadXRegister(rm);
WriteXRegister(instr->GetRn(), addr_base);
} else {
@@ -5507,6 +6122,8 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
// and PostIndex addressing.
bool do_load = false;
+ bool replicating = false;
+
NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
VectorFormat vf_t = nfd.GetVectorFormat();
@@ -5581,99 +6198,67 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
}
case NEON_LD1R:
- case NEON_LD1R_post: {
- vf = vf_t;
- ld1r(vf, ReadVRegister(rt), addr);
- do_load = true;
- break;
- }
-
+ case NEON_LD1R_post:
case NEON_LD2R:
- case NEON_LD2R_post: {
- vf = vf_t;
- int rt2 = (rt + 1) % kNumberOfVRegisters;
- ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
- do_load = true;
- break;
- }
-
+ case NEON_LD2R_post:
case NEON_LD3R:
- case NEON_LD3R_post: {
- vf = vf_t;
- int rt2 = (rt + 1) % kNumberOfVRegisters;
- int rt3 = (rt2 + 1) % kNumberOfVRegisters;
- ld3r(vf, ReadVRegister(rt), ReadVRegister(rt2), ReadVRegister(rt3), addr);
- do_load = true;
- break;
- }
-
+ case NEON_LD3R_post:
case NEON_LD4R:
- case NEON_LD4R_post: {
+ case NEON_LD4R_post:
vf = vf_t;
- int rt2 = (rt + 1) % kNumberOfVRegisters;
- int rt3 = (rt2 + 1) % kNumberOfVRegisters;
- int rt4 = (rt3 + 1) % kNumberOfVRegisters;
- ld4r(vf,
- ReadVRegister(rt),
- ReadVRegister(rt2),
- ReadVRegister(rt3),
- ReadVRegister(rt4),
- addr);
do_load = true;
+ replicating = true;
break;
- }
+
default:
VIXL_UNIMPLEMENTED();
}
- PrintRegisterFormat print_format =
- GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
- // Make sure that the print_format only includes a single lane.
- print_format =
- static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask);
-
- int esize = LaneSizeInBytesFromFormat(vf);
int index_shift = LaneSizeInBytesLog2FromFormat(vf);
int lane = instr->GetNEONLSIndex(index_shift);
- int scale = 0;
+ int reg_count = 0;
int rt2 = (rt + 1) % kNumberOfVRegisters;
int rt3 = (rt2 + 1) % kNumberOfVRegisters;
int rt4 = (rt3 + 1) % kNumberOfVRegisters;
switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
case NEONLoadStoreSingle1:
- scale = 1;
- if (do_load) {
+ reg_count = 1;
+ if (replicating) {
+ VIXL_ASSERT(do_load);
+ ld1r(vf, ReadVRegister(rt), addr);
+ } else if (do_load) {
ld1(vf, ReadVRegister(rt), lane, addr);
- LogVRead(addr, rt, print_format, lane);
} else {
st1(vf, ReadVRegister(rt), lane, addr);
- LogVWrite(addr, rt, print_format, lane);
}
break;
case NEONLoadStoreSingle2:
- scale = 2;
- if (do_load) {
+ reg_count = 2;
+ if (replicating) {
+ VIXL_ASSERT(do_load);
+ ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
+ } else if (do_load) {
ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
- LogVRead(addr, rt, print_format, lane);
- LogVRead(addr + esize, rt2, print_format, lane);
} else {
st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
- LogVWrite(addr, rt, print_format, lane);
- LogVWrite(addr + esize, rt2, print_format, lane);
}
break;
case NEONLoadStoreSingle3:
- scale = 3;
- if (do_load) {
+ reg_count = 3;
+ if (replicating) {
+ VIXL_ASSERT(do_load);
+ ld3r(vf,
+ ReadVRegister(rt),
+ ReadVRegister(rt2),
+ ReadVRegister(rt3),
+ addr);
+ } else if (do_load) {
ld3(vf,
ReadVRegister(rt),
ReadVRegister(rt2),
ReadVRegister(rt3),
lane,
addr);
- LogVRead(addr, rt, print_format, lane);
- LogVRead(addr + esize, rt2, print_format, lane);
- LogVRead(addr + (2 * esize), rt3, print_format, lane);
} else {
st3(vf,
ReadVRegister(rt),
@@ -5681,14 +6266,19 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
ReadVRegister(rt3),
lane,
addr);
- LogVWrite(addr, rt, print_format, lane);
- LogVWrite(addr + esize, rt2, print_format, lane);
- LogVWrite(addr + (2 * esize), rt3, print_format, lane);
}
break;
case NEONLoadStoreSingle4:
- scale = 4;
- if (do_load) {
+ reg_count = 4;
+ if (replicating) {
+ VIXL_ASSERT(do_load);
+ ld4r(vf,
+ ReadVRegister(rt),
+ ReadVRegister(rt2),
+ ReadVRegister(rt3),
+ ReadVRegister(rt4),
+ addr);
+ } else if (do_load) {
ld4(vf,
ReadVRegister(rt),
ReadVRegister(rt2),
@@ -5696,10 +6286,6 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
ReadVRegister(rt4),
lane,
addr);
- LogVRead(addr, rt, print_format, lane);
- LogVRead(addr + esize, rt2, print_format, lane);
- LogVRead(addr + (2 * esize), rt3, print_format, lane);
- LogVRead(addr + (3 * esize), rt4, print_format, lane);
} else {
st4(vf,
ReadVRegister(rt),
@@ -5708,22 +6294,38 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
ReadVRegister(rt4),
lane,
addr);
- LogVWrite(addr, rt, print_format, lane);
- LogVWrite(addr + esize, rt2, print_format, lane);
- LogVWrite(addr + (2 * esize), rt3, print_format, lane);
- LogVWrite(addr + (3 * esize), rt4, print_format, lane);
}
break;
default:
VIXL_UNIMPLEMENTED();
}
+ // Trace registers and/or memory writes.
+ PrintRegisterFormat print_format =
+ GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
+ if (do_load) {
+ if (ShouldTraceVRegs()) {
+ if (replicating) {
+ PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
+ } else {
+ PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
+ }
+ }
+ } else {
+ if (ShouldTraceWrites()) {
+ // Stores don't represent a change to the source register's value, so only
+ // print the relevant part of the value.
+ print_format = GetPrintRegPartial(print_format);
+ PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
+ }
+ }
+
if (addr_mode == PostIndex) {
int rm = instr->GetRm();
int lane_size = LaneSizeInBytesFromFormat(vf);
WriteXRegister(instr->GetRn(),
- addr +
- ((rm == 31) ? (scale * lane_size) : ReadXRegister(rm)));
+ addr + ((rm == 31) ? (reg_count * lane_size)
+ : ReadXRegister(rm)));
}
}
@@ -6421,10 +7023,10 @@ void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
NEONFormatDecoder nfd(instr, &map);
VectorFormat vf = nfd.GetVectorFormat();
- int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh());
- int immhimmb = instr->GetImmNEONImmhImmb();
- int right_shift = (16 << highestSetBit) - immhimmb;
- int left_shift = immhimmb - (8 << highestSetBit);
+ int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
+ int immh_immb = instr->GetImmNEONImmhImmb();
+ int right_shift = (16 << highest_set_bit) - immh_immb;
+ int left_shift = immh_immb - (8 << highest_set_bit);
switch (instr->Mask(NEONScalarShiftImmediateMask)) {
case NEON_SHL_scalar:
shl(vf, rd, rn, left_shift);
@@ -6529,10 +7131,10 @@ void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
{NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
- int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh());
- int immhimmb = instr->GetImmNEONImmhImmb();
- int right_shift = (16 << highestSetBit) - immhimmb;
- int left_shift = immhimmb - (8 << highestSetBit);
+ int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
+ int immh_immb = instr->GetImmNEONImmhImmb();
+ int right_shift = (16 << highest_set_bit) - immh_immb;
+ int left_shift = immh_immb - (8 << highest_set_bit);
switch (instr->Mask(NEONShiftImmediateMask)) {
case NEON_SHL:
@@ -6741,6 +7343,4356 @@ void Simulator::VisitNEONPerm(const Instruction* instr) {
}
}
+void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister temp;
+
+ VectorFormat vform = kFormatVnD;
+ mov(vform, temp, zm);
+
+ switch (instr->Mask(SVEAddressGenerationMask)) {
+ case ADR_z_az_d_s32_scaled:
+ sxt(vform, temp, temp, kSRegSize);
+ break;
+ case ADR_z_az_d_u32_scaled:
+ uxt(vform, temp, temp, kSRegSize);
+ break;
+ case ADR_z_az_s_same_scaled:
+ vform = kFormatVnS;
+ break;
+ case ADR_z_az_d_same_scaled:
+ // Nothing to do.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ int shift_amount = instr->ExtractBits(11, 10);
+ shl(vform, temp, temp, shift_amount);
+ add(vform, zd, zn, temp);
+}
+
+void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
+ const Instruction* instr) {
+ Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
+ switch (op) {
+ case AND_z_zi:
+ case EOR_z_zi:
+ case ORR_z_zi: {
+ int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+ uint64_t imm = instr->GetSVEImmLogical();
+ // Valid immediate is a non-zero bits
+ VIXL_ASSERT(imm != 0);
+ SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
+ op),
+ SVEFormatFromLaneSizeInBytesLog2(lane_size),
+ ReadVRegister(instr->GetRd()),
+ imm);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
+ switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
+ case DUPM_z_i: {
+ /* DUPM uses the same lane size and immediate encoding as bitwise logical
+ * immediate instructions. */
+ int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+ uint64_t imm = instr->GetSVEImmLogical();
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
+
+ LogicalOp logical_op;
+ switch (op) {
+ case AND_z_zz:
+ logical_op = AND;
+ break;
+ case BIC_z_zz:
+ logical_op = BIC;
+ break;
+ case EOR_z_zz:
+ logical_op = EOR;
+ break;
+ case ORR_z_zz:
+ logical_op = ORR;
+ break;
+ default:
+ logical_op = LogicalOpMask;
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ // Lane size of registers is irrelevant to the bitwise operations, so perform
+ // the operation on D-sized lanes.
+ SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
+}
+
+void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ SimVRegister scratch;
+ SimVRegister result;
+
+ bool for_division = false;
+ Shift shift_op = NO_SHIFT;
+ switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
+ case ASRD_z_p_zi:
+ shift_op = ASR;
+ for_division = true;
+ break;
+ case ASR_z_p_zi:
+ shift_op = ASR;
+ break;
+ case LSL_z_p_zi:
+ shift_op = LSL;
+ break;
+ case LSR_z_p_zi:
+ shift_op = LSR;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
+ unsigned lane_size = shift_and_lane_size.second;
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int shift_dist = shift_and_lane_size.first;
+
+ if ((shift_op == ASR) && for_division) {
+ asrd(vform, result, zdn, shift_dist);
+ } else {
+ if (shift_op == LSL) {
+ // Shift distance is computed differently for LSL. Convert the result.
+ shift_dist = (8 << lane_size) - shift_dist;
+ }
+ dup_immediate(vform, scratch, shift_dist);
+ SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ SimVRegister result;
+ SimVRegister shiftand; // Vector to be shifted.
+ SimVRegister shiftor; // Vector shift amount.
+
+ Shift shift_op = ASR;
+ mov(vform, shiftand, zdn);
+ mov(vform, shiftor, zm);
+
+ switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
+ case ASRR_z_p_zz:
+ mov(vform, shiftand, zm);
+ mov(vform, shiftor, zdn);
+ VIXL_FALLTHROUGH();
+ case ASR_z_p_zz:
+ break;
+ case LSLR_z_p_zz:
+ mov(vform, shiftand, zm);
+ mov(vform, shiftor, zdn);
+ VIXL_FALLTHROUGH();
+ case LSL_z_p_zz:
+ shift_op = LSL;
+ break;
+ case LSRR_z_p_zz:
+ mov(vform, shiftand, zm);
+ mov(vform, shiftor, zdn);
+ VIXL_FALLTHROUGH();
+ case LSR_z_p_zz:
+ shift_op = LSR;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ SVEBitwiseShiftHelper(shift_op,
+ vform,
+ result,
+ shiftand,
+ shiftor,
+ /* is_wide_elements = */ false);
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ SimVRegister result;
+ Shift shift_op = ASR;
+
+ switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
+ case ASR_z_p_zw:
+ break;
+ case LSL_z_p_zw:
+ shift_op = LSL;
+ break;
+ case LSR_z_p_zw:
+ shift_op = LSR;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ SVEBitwiseShiftHelper(shift_op,
+ vform,
+ result,
+ zdn,
+ zm,
+ /* is_wide_elements = */ true);
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ Shift shift_op;
+ switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
+ case ASR_z_zi:
+ case ASR_z_zw:
+ shift_op = ASR;
+ break;
+ case LSL_z_zi:
+ case LSL_z_zw:
+ shift_op = LSL;
+ break;
+ case LSR_z_zi:
+ case LSR_z_zw:
+ shift_op = LSR;
+ break;
+ default:
+ shift_op = NO_SHIFT;
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
+ case ASR_z_zi:
+ case LSL_z_zi:
+ case LSR_z_zi: {
+ SimVRegister scratch;
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ unsigned lane_size = shift_and_lane_size.second;
+ VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int shift_dist = shift_and_lane_size.first;
+ if (shift_op == LSL) {
+ // Shift distance is computed differently for LSL. Convert the result.
+ shift_dist = (8 << lane_size) - shift_dist;
+ }
+ dup_immediate(vform, scratch, shift_dist);
+ SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
+ break;
+ }
+ case ASR_z_zw:
+ case LSL_z_zw:
+ case LSR_z_zw:
+ SVEBitwiseShiftHelper(shift_op,
+ instr->GetSVEVectorFormat(),
+ zd,
+ zn,
+ ReadVRegister(instr->GetRm()),
+ true);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
+ // Although the instructions have a separate encoding class, the lane size is
+ // encoded in the same way as most other SVE instructions.
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ int pattern = instr->GetImmSVEPredicateConstraint();
+ int count = GetPredicateConstraintLaneCount(vform, pattern);
+ int multiplier = instr->ExtractBits(19, 16) + 1;
+
+ switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
+ case DECB_r_rs:
+ case DECD_r_rs:
+ case DECH_r_rs:
+ case DECW_r_rs:
+ count = -count;
+ break;
+ case INCB_r_rs:
+ case INCD_r_rs:
+ case INCH_r_rs:
+ case INCW_r_rs:
+ // Nothing to do.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ return;
+ }
+
+ WriteXRegister(instr->GetRd(),
+ IncDecN(ReadXRegister(instr->GetRd()),
+ count * multiplier,
+ kXRegSize));
+}
+
+void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ int pattern = instr->GetImmSVEPredicateConstraint();
+ int count = GetPredicateConstraintLaneCount(vform, pattern);
+ int multiplier = instr->ExtractBits(19, 16) + 1;
+
+ switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
+ case DECD_z_zs:
+ case DECH_z_zs:
+ case DECW_z_zs:
+ count = -count;
+ break;
+ case INCD_z_zs:
+ case INCH_z_zs:
+ case INCW_z_zs:
+ // Nothing to do.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister scratch;
+ dup_immediate(vform,
+ scratch,
+ IncDecN(0,
+ count * multiplier,
+ LaneSizeInBitsFromFormat(vform)));
+ add(vform, zd, zd, scratch);
+}
+
+void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
+ const Instruction* instr) {
+ // Although the instructions have a separate encoding class, the lane size is
+ // encoded in the same way as most other SVE instructions.
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ int pattern = instr->GetImmSVEPredicateConstraint();
+ int count = GetPredicateConstraintLaneCount(vform, pattern);
+ int multiplier = instr->ExtractBits(19, 16) + 1;
+
+ unsigned width = kXRegSize;
+ bool is_signed = false;
+
+ switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
+ case SQDECB_r_rs_sx:
+ case SQDECD_r_rs_sx:
+ case SQDECH_r_rs_sx:
+ case SQDECW_r_rs_sx:
+ width = kWRegSize;
+ VIXL_FALLTHROUGH();
+ case SQDECB_r_rs_x:
+ case SQDECD_r_rs_x:
+ case SQDECH_r_rs_x:
+ case SQDECW_r_rs_x:
+ is_signed = true;
+ count = -count;
+ break;
+ case SQINCB_r_rs_sx:
+ case SQINCD_r_rs_sx:
+ case SQINCH_r_rs_sx:
+ case SQINCW_r_rs_sx:
+ width = kWRegSize;
+ VIXL_FALLTHROUGH();
+ case SQINCB_r_rs_x:
+ case SQINCD_r_rs_x:
+ case SQINCH_r_rs_x:
+ case SQINCW_r_rs_x:
+ is_signed = true;
+ break;
+ case UQDECB_r_rs_uw:
+ case UQDECD_r_rs_uw:
+ case UQDECH_r_rs_uw:
+ case UQDECW_r_rs_uw:
+ width = kWRegSize;
+ VIXL_FALLTHROUGH();
+ case UQDECB_r_rs_x:
+ case UQDECD_r_rs_x:
+ case UQDECH_r_rs_x:
+ case UQDECW_r_rs_x:
+ count = -count;
+ break;
+ case UQINCB_r_rs_uw:
+ case UQINCD_r_rs_uw:
+ case UQINCH_r_rs_uw:
+ case UQINCW_r_rs_uw:
+ width = kWRegSize;
+ VIXL_FALLTHROUGH();
+ case UQINCB_r_rs_x:
+ case UQINCD_r_rs_x:
+ case UQINCH_r_rs_x:
+ case UQINCW_r_rs_x:
+ // Nothing to do.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ WriteXRegister(instr->GetRd(),
+ IncDecN(ReadXRegister(instr->GetRd()),
+ count * multiplier,
+ width,
+ true,
+ is_signed));
+}
+
+void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ int pattern = instr->GetImmSVEPredicateConstraint();
+ int count = GetPredicateConstraintLaneCount(vform, pattern);
+ int multiplier = instr->ExtractBits(19, 16) + 1;
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister scratch;
+ dup_immediate(vform,
+ scratch,
+ IncDecN(0,
+ count * multiplier,
+ LaneSizeInBitsFromFormat(vform)));
+
+ switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
+ case SQDECD_z_zs:
+ case SQDECH_z_zs:
+ case SQDECW_z_zs:
+ sub(vform, zd, zd, scratch).SignedSaturate(vform);
+ break;
+ case SQINCD_z_zs:
+ case SQINCH_z_zs:
+ case SQINCW_z_zs:
+ add(vform, zd, zd, scratch).SignedSaturate(vform);
+ break;
+ case UQDECD_z_zs:
+ case UQDECH_z_zs:
+ case UQDECW_z_zs:
+ sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
+ break;
+ case UQINCD_z_zs:
+ case UQINCH_z_zs:
+ case UQINCW_z_zs:
+ add(vform, zd, zd, scratch).UnsignedSaturate(vform);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEElementCount(const Instruction* instr) {
+ switch (instr->Mask(SVEElementCountMask)) {
+ case CNTB_r_s:
+ case CNTD_r_s:
+ case CNTH_r_s:
+ case CNTW_r_s:
+ // All handled below.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ // Although the instructions are separated, the lane size is encoded in the
+ // same way as most other SVE instructions.
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ int pattern = instr->GetImmSVEPredicateConstraint();
+ int count = GetPredicateConstraintLaneCount(vform, pattern);
+ int multiplier = instr->ExtractBits(19, 16) + 1;
+ WriteXRegister(instr->GetRd(), count * multiplier);
+}
+
+void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& vdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
+ case FADDA_v_p_z:
+ fadda(vform, vdn, pg, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ SimVRegister result;
+
+ switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
+ case FABD_z_p_zz:
+ fabd(vform, result, zdn, zm);
+ break;
+ case FADD_z_p_zz:
+ fadd(vform, result, zdn, zm);
+ break;
+ case FDIVR_z_p_zz:
+ fdiv(vform, result, zm, zdn);
+ break;
+ case FDIV_z_p_zz:
+ fdiv(vform, result, zdn, zm);
+ break;
+ case FMAXNM_z_p_zz:
+ fmaxnm(vform, result, zdn, zm);
+ break;
+ case FMAX_z_p_zz:
+ fmax(vform, result, zdn, zm);
+ break;
+ case FMINNM_z_p_zz:
+ fminnm(vform, result, zdn, zm);
+ break;
+ case FMIN_z_p_zz:
+ fmin(vform, result, zdn, zm);
+ break;
+ case FMULX_z_p_zz:
+ fmulx(vform, result, zdn, zm);
+ break;
+ case FMUL_z_p_zz:
+ fmul(vform, result, zdn, zm);
+ break;
+ case FSCALE_z_p_zz:
+ fscale(vform, result, zdn, zm);
+ break;
+ case FSUBR_z_p_zz:
+ fsub(vform, result, zm, zdn);
+ break;
+ case FSUB_z_p_zz:
+ fsub(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ int i1 = instr->ExtractBit(5);
+ SimVRegister add_sub_imm, min_max_imm, mul_imm;
+ uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
+ uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
+ uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
+ dup_immediate(vform, add_sub_imm, i1 ? one : half);
+ dup_immediate(vform, min_max_imm, i1 ? one : 0);
+ dup_immediate(vform, mul_imm, i1 ? two : half);
+
+ switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
+ case FADD_z_p_zs:
+ fadd(vform, result, zdn, add_sub_imm);
+ break;
+ case FMAXNM_z_p_zs:
+ fmaxnm(vform, result, zdn, min_max_imm);
+ break;
+ case FMAX_z_p_zs:
+ fmax(vform, result, zdn, min_max_imm);
+ break;
+ case FMINNM_z_p_zs:
+ fminnm(vform, result, zdn, min_max_imm);
+ break;
+ case FMIN_z_p_zs:
+ fmin(vform, result, zdn, min_max_imm);
+ break;
+ case FMUL_z_p_zs:
+ fmul(vform, result, zdn, mul_imm);
+ break;
+ case FSUBR_z_p_zs:
+ fsub(vform, result, add_sub_imm, zdn);
+ break;
+ case FSUB_z_p_zs:
+ fsub(vform, result, zdn, add_sub_imm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
+ case FTMAD_z_zzi:
+ ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
+ case FADD_z_zz:
+ fadd(vform, zd, zn, zm);
+ break;
+ case FMUL_z_zz:
+ fmul(vform, zd, zn, zm);
+ break;
+ case FRECPS_z_zz:
+ frecps(vform, zd, zn, zm);
+ break;
+ case FRSQRTS_z_zz:
+ frsqrts(vform, zd, zn, zm);
+ break;
+ case FSUB_z_zz:
+ fsub(vform, zd, zn, zm);
+ break;
+ case FTSMUL_z_zz:
+ ftsmul(vform, zd, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister result;
+
+ switch (instr->Mask(SVEFPCompareVectorsMask)) {
+ case FACGE_p_p_zz:
+ fabscmp(vform, result, zn, zm, ge);
+ break;
+ case FACGT_p_p_zz:
+ fabscmp(vform, result, zn, zm, gt);
+ break;
+ case FCMEQ_p_p_zz:
+ fcmp(vform, result, zn, zm, eq);
+ break;
+ case FCMGE_p_p_zz:
+ fcmp(vform, result, zn, zm, ge);
+ break;
+ case FCMGT_p_p_zz:
+ fcmp(vform, result, zn, zm, gt);
+ break;
+ case FCMNE_p_p_zz:
+ fcmp(vform, result, zn, zm, ne);
+ break;
+ case FCMUO_p_p_zz:
+ fcmp(vform, result, zn, zm, uo);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ ExtractFromSimVRegister(vform, pd, result);
+ mov_zeroing(pd, pg, pd);
+}
+
+void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister result;
+
+ SimVRegister zeros;
+ dup_immediate(kFormatVnD, zeros, 0);
+
+ switch (instr->Mask(SVEFPCompareWithZeroMask)) {
+ case FCMEQ_p_p_z0:
+ fcmp(vform, result, zn, zeros, eq);
+ break;
+ case FCMGE_p_p_z0:
+ fcmp(vform, result, zn, zeros, ge);
+ break;
+ case FCMGT_p_p_z0:
+ fcmp(vform, result, zn, zeros, gt);
+ break;
+ case FCMLE_p_p_z0:
+ fcmp(vform, result, zn, zeros, le);
+ break;
+ case FCMLT_p_p_z0:
+ fcmp(vform, result, zn, zeros, lt);
+ break;
+ case FCMNE_p_p_z0:
+ fcmp(vform, result, zn, zeros, ne);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ ExtractFromSimVRegister(vform, pd, result);
+ mov_zeroing(pd, pg, pd);
+}
+
+void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ int rot = instr->ExtractBit(16);
+
+ SimVRegister result;
+
+ switch (instr->Mask(SVEFPComplexAdditionMask)) {
+ case FCADD_z_p_zz:
+ fcadd(vform, result, zdn, zm, rot);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ int rot = instr->ExtractBits(14, 13);
+
+ SimVRegister result;
+
+ switch (instr->Mask(SVEFPComplexMulAddMask)) {
+ case FCMLA_z_p_zzz:
+ fcmla(vform, result, zn, zm, zda, rot);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zda, pg, result);
+}
+
+void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ int rot = instr->ExtractBits(11, 10);
+ unsigned zm_code = instr->GetRm();
+ int index = -1;
+ VectorFormat vform, vform_dup;
+
+ switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
+ case FCMLA_z_zzzi_h:
+ vform = kFormatVnH;
+ vform_dup = kFormatVnS;
+ index = zm_code >> 3;
+ zm_code &= 0x7;
+ break;
+ case FCMLA_z_zzzi_s:
+ vform = kFormatVnS;
+ vform_dup = kFormatVnD;
+ index = zm_code >> 4;
+ zm_code &= 0xf;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (index >= 0) {
+ SimVRegister temp;
+ dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
+ fcmla(vform, zda, zn, temp, zda, rot);
+ }
+}
+
+typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+
+void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& vd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ int lane_size = LaneSizeInBitsFromFormat(vform);
+
+ uint64_t inactive_value = 0;
+ FastReduceFn fn = nullptr;
+
+ switch (instr->Mask(SVEFPFastReductionMask)) {
+ case FADDV_v_p_z:
+ fn = &Simulator::faddv;
+ break;
+ case FMAXNMV_v_p_z:
+ inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+ fn = &Simulator::fmaxnmv;
+ break;
+ case FMAXV_v_p_z:
+ inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
+ fn = &Simulator::fmaxv;
+ break;
+ case FMINNMV_v_p_z:
+ inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+ fn = &Simulator::fminnmv;
+ break;
+ case FMINV_v_p_z:
+ inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
+ fn = &Simulator::fminv;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SimVRegister scratch;
+ dup_immediate(vform, scratch, inactive_value);
+ mov_merging(vform, scratch, pg, zn);
+ if (fn != nullptr) (this->*fn)(vform, vd, scratch);
+}
+
+void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
+ VectorFormat vform = kFormatUndefined;
+ unsigned zm_code = instr->GetRm() & 0xf;
+ unsigned index = instr->ExtractBits(20, 19);
+
+ switch (instr->Mask(SVEFPMulIndexMask)) {
+ case FMUL_z_zzi_d:
+ vform = kFormatVnD;
+ index >>= 1; // Only bit 20 is the index for D lanes.
+ break;
+ case FMUL_z_zzi_h_i3h:
+ index += 4; // Bit 22 (i3h) is the top bit of index.
+ VIXL_FALLTHROUGH();
+ case FMUL_z_zzi_h:
+ vform = kFormatVnH;
+ zm_code &= 7; // Three bits used for zm.
+ break;
+ case FMUL_z_zzi_s:
+ vform = kFormatVnS;
+ zm_code &= 7; // Three bits used for zm.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister temp;
+
+ dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index);
+ fmul(vform, zd, zn, temp);
+}
+
+void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ if (instr->ExtractBit(15) == 0) {
+ // Floating-point multiply-accumulate writing addend.
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(SVEFPMulAddMask)) {
+ // zda = zda + zn * zm
+ case FMLA_z_p_zzz:
+ fmla(vform, result, zd, zn, zm);
+ break;
+ // zda = -zda + -zn * zm
+ case FNMLA_z_p_zzz:
+ fneg(vform, result, zd);
+ fmls(vform, result, result, zn, zm);
+ break;
+ // zda = zda + -zn * zm
+ case FMLS_z_p_zzz:
+ fmls(vform, result, zd, zn, zm);
+ break;
+ // zda = -zda + zn * zm
+ case FNMLS_z_p_zzz:
+ fneg(vform, result, zd);
+ fmla(vform, result, result, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ } else {
+ // Floating-point multiply-accumulate writing multiplicand.
+ SimVRegister& za = ReadVRegister(instr->GetRm());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(SVEFPMulAddMask)) {
+ // zdn = za + zdn * zm
+ case FMAD_z_p_zzz:
+ fmla(vform, result, za, zd, zm);
+ break;
+ // zdn = -za + -zdn * zm
+ case FNMAD_z_p_zzz:
+ fneg(vform, result, za);
+ fmls(vform, result, result, zd, zm);
+ break;
+ // zdn = za + -zdn * zm
+ case FMSB_z_p_zzz:
+ fmls(vform, result, za, zd, zm);
+ break;
+ // zdn = -za + zdn * zm
+ case FNMSB_z_p_zzz:
+ fneg(vform, result, za);
+ fmla(vform, result, result, zd, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ }
+
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
+ VectorFormat vform = kFormatUndefined;
+ unsigned zm_code = 0xffffffff;
+ unsigned index = 0xffffffff;
+
+ switch (instr->Mask(SVEFPMulAddIndexMask)) {
+ case FMLA_z_zzzi_d:
+ case FMLS_z_zzzi_d:
+ vform = kFormatVnD;
+ zm_code = instr->GetRmLow16();
+ // Only bit 20 is the index for D lanes.
+ index = instr->ExtractBit(20);
+ break;
+ case FMLA_z_zzzi_s:
+ case FMLS_z_zzzi_s:
+ vform = kFormatVnS;
+ zm_code = instr->GetRm() & 0x7; // Three bits used for zm.
+ index = instr->ExtractBits(20, 19);
+ break;
+ case FMLA_z_zzzi_h:
+ case FMLS_z_zzzi_h:
+ case FMLA_z_zzzi_h_i3h:
+ case FMLS_z_zzzi_h_i3h:
+ vform = kFormatVnH;
+ zm_code = instr->GetRm() & 0x7; // Three bits used for zm.
+ index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister temp;
+
+ dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index);
+ if (instr->ExtractBit(10) == 1) {
+ fmls(vform, zd, zd, zn, temp);
+ } else {
+ fmla(vform, zd, zd, zn, temp);
+ }
+}
+
+void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ int dst_data_size;
+ int src_data_size;
+
+ switch (instr->Mask(SVEFPConvertToIntMask)) {
+ case FCVTZS_z_p_z_d2w:
+ case FCVTZU_z_p_z_d2w:
+ dst_data_size = kSRegSize;
+ src_data_size = kDRegSize;
+ break;
+ case FCVTZS_z_p_z_d2x:
+ case FCVTZU_z_p_z_d2x:
+ dst_data_size = kDRegSize;
+ src_data_size = kDRegSize;
+ break;
+ case FCVTZS_z_p_z_fp162h:
+ case FCVTZU_z_p_z_fp162h:
+ dst_data_size = kHRegSize;
+ src_data_size = kHRegSize;
+ break;
+ case FCVTZS_z_p_z_fp162w:
+ case FCVTZU_z_p_z_fp162w:
+ dst_data_size = kSRegSize;
+ src_data_size = kHRegSize;
+ break;
+ case FCVTZS_z_p_z_fp162x:
+ case FCVTZU_z_p_z_fp162x:
+ dst_data_size = kDRegSize;
+ src_data_size = kHRegSize;
+ break;
+ case FCVTZS_z_p_z_s2w:
+ case FCVTZU_z_p_z_s2w:
+ dst_data_size = kSRegSize;
+ src_data_size = kSRegSize;
+ break;
+ case FCVTZS_z_p_z_s2x:
+ case FCVTZU_z_p_z_s2x:
+ dst_data_size = kDRegSize;
+ src_data_size = kSRegSize;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ dst_data_size = 0;
+ src_data_size = 0;
+ break;
+ }
+
+ VectorFormat vform =
+ SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
+
+ if (instr->ExtractBit(16) == 0) {
+ fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
+ } else {
+ fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
+ }
+}
+
+void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ int dst_data_size;
+ int src_data_size;
+
+ switch (instr->Mask(SVEFPConvertPrecisionMask)) {
+ case FCVT_z_p_z_d2h:
+ dst_data_size = kHRegSize;
+ src_data_size = kDRegSize;
+ break;
+ case FCVT_z_p_z_d2s:
+ dst_data_size = kSRegSize;
+ src_data_size = kDRegSize;
+ break;
+ case FCVT_z_p_z_h2d:
+ dst_data_size = kDRegSize;
+ src_data_size = kHRegSize;
+ break;
+ case FCVT_z_p_z_h2s:
+ dst_data_size = kSRegSize;
+ src_data_size = kHRegSize;
+ break;
+ case FCVT_z_p_z_s2d:
+ dst_data_size = kDRegSize;
+ src_data_size = kSRegSize;
+ break;
+ case FCVT_z_p_z_s2h:
+ dst_data_size = kHRegSize;
+ src_data_size = kSRegSize;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ dst_data_size = 0;
+ src_data_size = 0;
+ break;
+ }
+ VectorFormat vform =
+ SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
+
+ fcvt(vform, dst_data_size, src_data_size, zd, pg, zn);
+}
+
+void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister result;
+
+ switch (instr->Mask(SVEFPUnaryOpMask)) {
+ case FRECPX_z_p_z:
+ frecpx(vform, result, zn);
+ break;
+ case FSQRT_z_p_z:
+ fsqrt(vform, result, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+ bool exact_exception = false;
+
+ switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
+ case FRINTA_z_p_z:
+ fpcr_rounding = FPTieAway;
+ break;
+ case FRINTI_z_p_z:
+ break; // Use FPCR rounding mode.
+ case FRINTM_z_p_z:
+ fpcr_rounding = FPNegativeInfinity;
+ break;
+ case FRINTN_z_p_z:
+ fpcr_rounding = FPTieEven;
+ break;
+ case FRINTP_z_p_z:
+ fpcr_rounding = FPPositiveInfinity;
+ break;
+ case FRINTX_z_p_z:
+ exact_exception = true;
+ break;
+ case FRINTZ_z_p_z:
+ fpcr_rounding = FPZero;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SimVRegister result;
+ frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+ int dst_data_size;
+ int src_data_size;
+
+ switch (instr->Mask(SVEIntConvertToFPMask)) {
+ case SCVTF_z_p_z_h2fp16:
+ case UCVTF_z_p_z_h2fp16:
+ dst_data_size = kHRegSize;
+ src_data_size = kHRegSize;
+ break;
+ case SCVTF_z_p_z_w2d:
+ case UCVTF_z_p_z_w2d:
+ dst_data_size = kDRegSize;
+ src_data_size = kSRegSize;
+ break;
+ case SCVTF_z_p_z_w2fp16:
+ case UCVTF_z_p_z_w2fp16:
+ dst_data_size = kHRegSize;
+ src_data_size = kSRegSize;
+ break;
+ case SCVTF_z_p_z_w2s:
+ case UCVTF_z_p_z_w2s:
+ dst_data_size = kSRegSize;
+ src_data_size = kSRegSize;
+ break;
+ case SCVTF_z_p_z_x2d:
+ case UCVTF_z_p_z_x2d:
+ dst_data_size = kDRegSize;
+ src_data_size = kDRegSize;
+ break;
+ case SCVTF_z_p_z_x2fp16:
+ case UCVTF_z_p_z_x2fp16:
+ dst_data_size = kHRegSize;
+ src_data_size = kDRegSize;
+ break;
+ case SCVTF_z_p_z_x2s:
+ case UCVTF_z_p_z_x2s:
+ dst_data_size = kSRegSize;
+ src_data_size = kDRegSize;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ dst_data_size = 0;
+ src_data_size = 0;
+ break;
+ }
+
+ VectorFormat vform =
+ SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
+
+ if (instr->ExtractBit(16) == 0) {
+ scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
+ } else {
+ ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
+ }
+}
+
+void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+
+ switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
+ case FRECPE_z_z:
+ frecpe(vform, zd, zn, fpcr_rounding);
+ break;
+ case FRSQRTE_z_z:
+ frsqrte(vform, zd, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
+
+ int count = CountActiveLanes(vform, pg);
+
+ if (instr->ExtractBit(11) == 0) {
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+ case DECP_z_p_z:
+ sub_uint(vform, zdn, zdn, count);
+ break;
+ case INCP_z_p_z:
+ add_uint(vform, zdn, zdn, count);
+ break;
+ case SQDECP_z_p_z:
+ sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
+ break;
+ case SQINCP_z_p_z:
+ add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
+ break;
+ case UQDECP_z_p_z:
+ sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
+ break;
+ case UQINCP_z_p_z:
+ add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ } else {
+ bool is_saturating = (instr->ExtractBit(18) == 0);
+ bool decrement =
+ is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
+ bool is_signed = (instr->ExtractBit(16) == 0);
+ bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
+ unsigned width = sf ? kXRegSize : kWRegSize;
+
+ switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+ case DECP_r_p_r:
+ case INCP_r_p_r:
+ case SQDECP_r_p_r_sx:
+ case SQDECP_r_p_r_x:
+ case SQINCP_r_p_r_sx:
+ case SQINCP_r_p_r_x:
+ case UQDECP_r_p_r_uw:
+ case UQDECP_r_p_r_x:
+ case UQINCP_r_p_r_uw:
+ case UQINCP_r_p_r_x:
+ WriteXRegister(instr->GetRd(),
+ IncDecN(ReadXRegister(instr->GetRd()),
+ decrement ? -count : count,
+ width,
+ is_saturating,
+ is_signed));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ }
+}
+
+uint64_t Simulator::IncDecN(uint64_t acc,
+ int64_t delta,
+ unsigned n,
+ bool is_saturating,
+ bool is_signed) {
+ VIXL_ASSERT(n <= 64);
+ VIXL_ASSERT(IsIntN(n, delta));
+
+ uint64_t sign_mask = UINT64_C(1) << (n - 1);
+ uint64_t mask = GetUintMask(n);
+
+ acc &= mask; // Ignore initial accumulator high bits.
+ uint64_t result = (acc + delta) & mask;
+
+ bool result_negative = ((result & sign_mask) != 0);
+
+ if (is_saturating) {
+ if (is_signed) {
+ bool acc_negative = ((acc & sign_mask) != 0);
+ bool delta_negative = delta < 0;
+
+ // If the signs of the operands are the same, but different from the
+ // result, there was an overflow.
+ if ((acc_negative == delta_negative) &&
+ (acc_negative != result_negative)) {
+ if (result_negative) {
+ // Saturate to [..., INT<n>_MAX].
+ result_negative = false;
+ result = mask & ~sign_mask; // E.g. 0x000000007fffffff
+ } else {
+ // Saturate to [INT<n>_MIN, ...].
+ result_negative = true;
+ result = ~mask | sign_mask; // E.g. 0xffffffff80000000
+ }
+ }
+ } else {
+ if ((delta < 0) && (result > acc)) {
+ // Saturate to [0, ...].
+ result = 0;
+ } else if ((delta > 0) && (result < acc)) {
+ // Saturate to [..., UINT<n>_MAX].
+ result = mask;
+ }
+ }
+ }
+
+ // Sign-extend if necessary.
+ if (result_negative && is_signed) result |= ~mask;
+
+ return result;
+}
+
+void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ switch (instr->Mask(SVEIndexGenerationMask)) {
+ case INDEX_z_ii:
+ case INDEX_z_ir:
+ case INDEX_z_ri:
+ case INDEX_z_rr: {
+ uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
+ : instr->ExtractSignedBits(9, 5);
+ uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
+ : instr->ExtractSignedBits(20, 16);
+ index(vform, zd, start, step);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
+ case ADD_z_zz:
+ add(vform, zd, zn, zm);
+ break;
+ case SQADD_z_zz:
+ add(vform, zd, zn, zm).SignedSaturate(vform);
+ break;
+ case SQSUB_z_zz:
+ sub(vform, zd, zn, zm).SignedSaturate(vform);
+ break;
+ case SUB_z_zz:
+ sub(vform, zd, zn, zm);
+ break;
+ case UQADD_z_zz:
+ add(vform, zd, zn, zm).UnsignedSaturate(vform);
+ break;
+ case UQSUB_z_zz:
+ sub(vform, zd, zn, zm).UnsignedSaturate(vform);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
+ case ADD_z_p_zz:
+ add(vform, result, zdn, zm);
+ break;
+ case SUBR_z_p_zz:
+ sub(vform, result, zm, zdn);
+ break;
+ case SUB_z_p_zz:
+ sub(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
+ case AND_z_p_zz:
+ SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
+ break;
+ case BIC_z_p_zz:
+ SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
+ break;
+ case EOR_z_p_zz:
+ SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
+ break;
+ case ORR_z_p_zz:
+ SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
+ case MUL_z_p_zz:
+ mul(vform, result, zdn, zm);
+ break;
+ case SMULH_z_p_zz:
+ smulh(vform, result, zdn, zm);
+ break;
+ case UMULH_z_p_zz:
+ umulh(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMinMaxDifference_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
+ case SABD_z_p_zz:
+ absdiff(vform, result, zdn, zm, true);
+ break;
+ case SMAX_z_p_zz:
+ smax(vform, result, zdn, zm);
+ break;
+ case SMIN_z_p_zz:
+ smin(vform, result, zdn, zm);
+ break;
+ case UABD_z_p_zz:
+ absdiff(vform, result, zdn, zm, false);
+ break;
+ case UMAX_z_p_zz:
+ umax(vform, result, zdn, zm);
+ break;
+ case UMIN_z_p_zz:
+ umin(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister scratch;
+
+ switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
+ case MUL_z_zi:
+ dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
+ mul(vform, zd, zd, scratch);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
+
+ switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
+ case SDIVR_z_p_zz:
+ sdiv(vform, result, zm, zdn);
+ break;
+ case SDIV_z_p_zz:
+ sdiv(vform, result, zdn, zm);
+ break;
+ case UDIVR_z_p_zz:
+ udiv(vform, result, zm, zdn);
+ break;
+ case UDIV_z_p_zz:
+ udiv(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister scratch;
+
+ uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
+ int64_t signed_imm = instr->GetImmSVEIntWideSigned();
+
+ switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
+ case SMAX_z_zi:
+ dup_immediate(vform, scratch, signed_imm);
+ smax(vform, zd, zd, scratch);
+ break;
+ case SMIN_z_zi:
+ dup_immediate(vform, scratch, signed_imm);
+ smin(vform, zd, zd, scratch);
+ break;
+ case UMAX_z_zi:
+ dup_immediate(vform, scratch, unsigned_imm);
+ umax(vform, zd, zd, scratch);
+ break;
+ case UMIN_z_zi:
+ dup_immediate(vform, scratch, unsigned_imm);
+ umin(vform, zd, zd, scratch);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIntCompareScalarCountAndLimit(
+ const Instruction* instr) {
+ unsigned rn_code = instr->GetRn();
+ unsigned rm_code = instr->GetRm();
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ bool is_64_bit = instr->ExtractBit(12) == 1;
+ int64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
+ int64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
+
+ bool last = true;
+ for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ bool cond = false;
+ switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) {
+ case WHILELE_p_p_rr:
+ cond = src1 <= src2;
+ break;
+ case WHILELO_p_p_rr:
+ cond = static_cast<uint64_t>(src1) < static_cast<uint64_t>(src2);
+ break;
+ case WHILELS_p_p_rr:
+ cond = static_cast<uint64_t>(src1) <= static_cast<uint64_t>(src2);
+ break;
+ case WHILELT_p_p_rr:
+ cond = src1 < src2;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ last = last && cond;
+ LogicPRegister dst(pd);
+ dst.SetActive(vform, lane, last);
+ src1 += 1;
+ }
+
+ PredTest(vform, GetPTrue(), pd);
+ LogSystemRegister(NZCV);
+}
+
+void Simulator::VisitSVEConditionallyTerminateScalars(
+ const Instruction* instr) {
+ unsigned rn_code = instr->GetRn();
+ unsigned rm_code = instr->GetRm();
+ bool is_64_bit = instr->ExtractBit(22) == 1;
+ uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
+ uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
+ bool term;
+ switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
+ case CTERMEQ_rr:
+ term = src1 == src2;
+ break;
+ case CTERMNE_rr:
+ term = src1 != src2;
+ break;
+ default:
+ term = false;
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ ReadNzcv().SetN(term ? 1 : 0);
+ ReadNzcv().SetV(term ? 0 : !ReadC());
+ LogSystemRegister(NZCV);
+}
+
+void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
+ bool commute_inputs = false;
+ Condition cond;
+ switch (instr->Mask(SVEIntCompareSignedImmMask)) {
+ case CMPEQ_p_p_zi:
+ cond = eq;
+ break;
+ case CMPGE_p_p_zi:
+ cond = ge;
+ break;
+ case CMPGT_p_p_zi:
+ cond = gt;
+ break;
+ case CMPLE_p_p_zi:
+ cond = ge;
+ commute_inputs = true;
+ break;
+ case CMPLT_p_p_zi:
+ cond = gt;
+ commute_inputs = true;
+ break;
+ case CMPNE_p_p_zi:
+ cond = ne;
+ break;
+ default:
+ cond = al;
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister src2;
+ dup_immediate(vform,
+ src2,
+ ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
+ SVEIntCompareVectorsHelper(cond,
+ vform,
+ ReadPRegister(instr->GetPd()),
+ ReadPRegister(instr->GetPgLow8()),
+ commute_inputs ? src2
+ : ReadVRegister(instr->GetRn()),
+ commute_inputs ? ReadVRegister(instr->GetRn())
+ : src2);
+}
+
+void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
+ bool commute_inputs = false;
+ Condition cond;
+ switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
+ case CMPHI_p_p_zi:
+ cond = hi;
+ break;
+ case CMPHS_p_p_zi:
+ cond = hs;
+ break;
+ case CMPLO_p_p_zi:
+ cond = hi;
+ commute_inputs = true;
+ break;
+ case CMPLS_p_p_zi:
+ cond = hs;
+ commute_inputs = true;
+ break;
+ default:
+ cond = al;
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister src2;
+ dup_immediate(vform, src2, instr->ExtractBits(20, 14));
+ SVEIntCompareVectorsHelper(cond,
+ vform,
+ ReadPRegister(instr->GetPd()),
+ ReadPRegister(instr->GetPgLow8()),
+ commute_inputs ? src2
+ : ReadVRegister(instr->GetRn()),
+ commute_inputs ? ReadVRegister(instr->GetRn())
+ : src2);
+}
+
+void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
+ Instr op = instr->Mask(SVEIntCompareVectorsMask);
+ bool is_wide_elements = false;
+ switch (op) {
+ case CMPEQ_p_p_zw:
+ case CMPGE_p_p_zw:
+ case CMPGT_p_p_zw:
+ case CMPHI_p_p_zw:
+ case CMPHS_p_p_zw:
+ case CMPLE_p_p_zw:
+ case CMPLO_p_p_zw:
+ case CMPLS_p_p_zw:
+ case CMPLT_p_p_zw:
+ case CMPNE_p_p_zw:
+ is_wide_elements = true;
+ break;
+ }
+
+ Condition cond;
+ switch (op) {
+ case CMPEQ_p_p_zw:
+ case CMPEQ_p_p_zz:
+ cond = eq;
+ break;
+ case CMPGE_p_p_zw:
+ case CMPGE_p_p_zz:
+ cond = ge;
+ break;
+ case CMPGT_p_p_zw:
+ case CMPGT_p_p_zz:
+ cond = gt;
+ break;
+ case CMPHI_p_p_zw:
+ case CMPHI_p_p_zz:
+ cond = hi;
+ break;
+ case CMPHS_p_p_zw:
+ case CMPHS_p_p_zz:
+ cond = hs;
+ break;
+ case CMPNE_p_p_zw:
+ case CMPNE_p_p_zz:
+ cond = ne;
+ break;
+ case CMPLE_p_p_zw:
+ cond = le;
+ break;
+ case CMPLO_p_p_zw:
+ cond = lo;
+ break;
+ case CMPLS_p_p_zw:
+ cond = ls;
+ break;
+ case CMPLT_p_p_zw:
+ cond = lt;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ cond = al;
+ break;
+ }
+
+ SVEIntCompareVectorsHelper(cond,
+ instr->GetSVEVectorFormat(),
+ ReadPRegister(instr->GetPd()),
+ ReadPRegister(instr->GetPgLow8()),
+ ReadVRegister(instr->GetRn()),
+ ReadVRegister(instr->GetRm()),
+ is_wide_elements);
+}
+
+void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
+ (vform == kFormatVnD));
+
+ switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
+ case FEXPA_z_z:
+ fexpa(vform, zd, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
+ (vform == kFormatVnD));
+
+ switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
+ case FTSSEL_z_zz:
+ ftssel(vform, zd, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEConstructivePrefix_Unpredicated(
+ const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
+ case MOVPRFX_z_z:
+ mov(kFormatVnD, zd, zn); // The lane size is arbitrary.
+ // Record the movprfx, so the next ExecuteInstruction() can check it.
+ movprfx_ = instr;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ SimVRegister result;
+ switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
+ case MLA_z_p_zzz:
+ mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
+ break;
+ case MLS_z_p_zzz:
+ mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
+ break;
+ case MAD_z_p_zzz:
+ // 'za' is encoded in 'Rn'.
+ mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
+ break;
+ case MSB_z_p_zzz: {
+ // 'za' is encoded in 'Rn'.
+ mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
+}
+
+void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) {
+ case SDOT_z_zzz:
+ sdot(vform, zda, zn, zm);
+ break;
+ case UDOT_z_zzz:
+ udot(vform, zda, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEMovprfx(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+ switch (instr->Mask(SVEMovprfxMask)) {
+ case MOVPRFX_z_p_z:
+ if (instr->ExtractBit(16)) {
+ mov_merging(vform, zd, pg, zn);
+ } else {
+ mov_zeroing(vform, zd, pg, zn);
+ }
+
+ // Record the movprfx, so the next ExecuteInstruction() can check it.
+ movprfx_ = instr;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEIntReduction(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& vd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
+ switch (instr->Mask(SVEIntReductionLogicalMask)) {
+ case ANDV_r_p_z:
+ andv(vform, vd, pg, zn);
+ break;
+ case EORV_r_p_z:
+ eorv(vform, vd, pg, zn);
+ break;
+ case ORV_r_p_z:
+ orv(vform, vd, pg, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ } else {
+ switch (instr->Mask(SVEIntReductionMask)) {
+ case SADDV_r_p_z:
+ saddv(vform, vd, pg, zn);
+ break;
+ case SMAXV_r_p_z:
+ smaxv(vform, vd, pg, zn);
+ break;
+ case SMINV_r_p_z:
+ sminv(vform, vd, pg, zn);
+ break;
+ case UADDV_r_p_z:
+ uaddv(vform, vd, pg, zn);
+ break;
+ case UMAXV_r_p_z:
+ umaxv(vform, vd, pg, zn);
+ break;
+ case UMINV_r_p_z:
+ uminv(vform, vd, pg, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ }
+}
+
+void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ SimVRegister result;
+ switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
+ case ABS_z_p_z:
+ abs(vform, result, zn);
+ break;
+ case CLS_z_p_z:
+ cls(vform, result, zn);
+ break;
+ case CLZ_z_p_z:
+ clz(vform, result, zn);
+ break;
+ case CNOT_z_p_z:
+ cnot(vform, result, zn);
+ break;
+ case CNT_z_p_z:
+ cnt(vform, result, zn);
+ break;
+ case FABS_z_p_z:
+ fabs_(vform, result, zn);
+ break;
+ case FNEG_z_p_z:
+ fneg(vform, result, zn);
+ break;
+ case NEG_z_p_z:
+ neg(vform, result, zn);
+ break;
+ case NOT_z_p_z:
+ not_(vform, result, zn);
+ break;
+ case SXTB_z_p_z:
+ case SXTH_z_p_z:
+ case SXTW_z_p_z:
+ sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
+ break;
+ case UXTB_z_p_z:
+ case UXTH_z_p_z:
+ case UXTW_z_p_z:
+ uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
+ // There is only one instruction in this group.
+ VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
+
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+ SimVRegister result;
+ switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
+ case FCPY_z_p_i: {
+ int imm8 = instr->ExtractBits(12, 5);
+ uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
+ Instruction::Imm8ToFP64(imm8));
+ dup_immediate(vform, result, value);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister scratch;
+
+ uint64_t imm = instr->GetImmSVEIntWideUnsigned();
+ imm <<= instr->ExtractBit(13) * 8;
+
+ switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
+ case ADD_z_zi:
+ add_uint(vform, zd, zd, imm);
+ break;
+ case SQADD_z_zi:
+ add_uint(vform, zd, zd, imm).SignedSaturate(vform);
+ break;
+ case SQSUB_z_zi:
+ sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
+ break;
+ case SUBR_z_zi:
+ dup_immediate(vform, scratch, imm);
+ sub(vform, zd, scratch, zd);
+ break;
+ case SUB_z_zi:
+ sub_uint(vform, zd, zd, imm);
+ break;
+ case UQADD_z_zi:
+ add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
+ break;
+ case UQSUB_z_zi:
+ sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
+ break;
+ default:
+ break;
+ }
+}
+
+void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+ VectorFormat format = instr->GetSVEVectorFormat();
+ int64_t imm = instr->GetImmSVEIntWideSigned();
+ int shift = instr->ExtractBit(13) * 8;
+ imm *= 1 << shift;
+
+ switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
+ case DUP_z_i:
+ // The encoding of byte-sized lanes with lsl #8 is undefined.
+ if ((format == kFormatVnB) && (shift == 8)) {
+ VIXL_UNIMPLEMENTED();
+ } else {
+ dup_immediate(format, zd, imm);
+ }
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+ switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
+ case FDUP_z_i:
+ switch (vform) {
+ case kFormatVnH:
+ dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
+ break;
+ case kFormatVnS:
+ dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
+ break;
+ case kFormatVnD:
+ dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(
+ SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
+ case LD1H_z_p_bz_s_x32_scaled:
+ case LD1SH_z_p_bz_s_x32_scaled:
+ case LDFF1H_z_p_bz_s_x32_scaled:
+ case LDFF1SH_z_p_bz_s_x32_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+ SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
+}
+
+void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
+ case LD1B_z_p_bz_s_x32_unscaled:
+ case LD1H_z_p_bz_s_x32_unscaled:
+ case LD1SB_z_p_bz_s_x32_unscaled:
+ case LD1SH_z_p_bz_s_x32_unscaled:
+ case LD1W_z_p_bz_s_x32_unscaled:
+ case LDFF1B_z_p_bz_s_x32_unscaled:
+ case LDFF1H_z_p_bz_s_x32_unscaled:
+ case LDFF1SB_z_p_bz_s_x32_unscaled:
+ case LDFF1SH_z_p_bz_s_x32_unscaled:
+ case LDFF1W_z_p_bz_s_x32_unscaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+ SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
+}
+
+void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
+ case LD1B_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LD1H_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LD1SB_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LD1SH_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LD1W_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LDFF1B_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LDFF1H_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LDFF1SB_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LDFF1SH_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ case LDFF1W_z_p_ai_s:
+ VIXL_UNIMPLEMENTED();
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
+ const Instruction* instr) {
+ switch (
+ instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
+ case LD1W_z_p_bz_s_x32_scaled:
+ case LDFF1W_z_p_bz_s_x32_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+ SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
+}
+
+void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
+ const Instruction* instr) {
+ switch (
+ instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_bz_s_x32_scaled:
+ case PRFD_i_p_bz_s_x32_scaled:
+ case PRFH_i_p_bz_s_x32_scaled:
+ case PRFW_i_p_bz_s_x32_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_ai_s:
+ case PRFD_i_p_ai_s:
+ case PRFH_i_p_ai_s:
+ case PRFW_i_p_ai_s:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_bi_s:
+ case PRFD_i_p_bi_s:
+ case PRFH_i_p_bi_s:
+ case PRFW_i_p_bi_s:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
+ const Instruction* instr) {
+ switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_br_s:
+ case PRFD_i_p_br_s:
+ case PRFH_i_p_br_s:
+ case PRFW_i_p_br_s:
+ if (instr->GetRm() == kZeroRegCode) {
+ VIXL_UNIMPLEMENTED();
+ }
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
+ bool is_signed;
+ switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
+ case LD1RB_z_p_bi_u8:
+ case LD1RB_z_p_bi_u16:
+ case LD1RB_z_p_bi_u32:
+ case LD1RB_z_p_bi_u64:
+ case LD1RH_z_p_bi_u16:
+ case LD1RH_z_p_bi_u32:
+ case LD1RH_z_p_bi_u64:
+ case LD1RW_z_p_bi_u32:
+ case LD1RW_z_p_bi_u64:
+ case LD1RD_z_p_bi_u64:
+ is_signed = false;
+ break;
+ case LD1RSB_z_p_bi_s16:
+ case LD1RSB_z_p_bi_s32:
+ case LD1RSB_z_p_bi_s64:
+ case LD1RSH_z_p_bi_s32:
+ case LD1RSH_z_p_bi_s64:
+ case LD1RSW_z_p_bi_s64:
+ is_signed = true;
+ break;
+ default:
+ // This encoding group is complete, so no other values should be possible.
+ VIXL_UNREACHABLE();
+ is_signed = false;
+ break;
+ }
+
+ int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+ int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
+ VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+ uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
+ uint64_t base = ReadXRegister(instr->GetRn()) + offset;
+ VectorFormat unpack_vform =
+ SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
+ SimVRegister temp;
+ ld1r(vform, unpack_vform, temp, base, is_signed);
+ mov_zeroing(vform,
+ ReadVRegister(instr->GetRt()),
+ ReadPRegister(instr->GetPgLow8()),
+ temp);
+}
+
+void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
+ switch (instr->Mask(SVELoadPredicateRegisterMask)) {
+ case LDR_p_bi: {
+ SimPRegister& pt = ReadPRegister(instr->GetPt());
+ int pl = GetPredicateLengthInBytes();
+ int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+ uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+ uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
+ for (int i = 0; i < pl; i++) {
+ pt.Insert(i, Memory::Read<uint8_t>(address + i));
+ }
+ LogPRead(instr->GetPt(), address);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
+ switch (instr->Mask(SVELoadVectorRegisterMask)) {
+ case LDR_z_bi: {
+ SimVRegister& zt = ReadVRegister(instr->GetRt());
+ int vl = GetVectorLengthInBytes();
+ int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+ uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+ uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
+ for (int i = 0; i < vl; i++) {
+ zt.Insert(i, Memory::Read<uint8_t>(address + i));
+ }
+ LogZRead(instr->GetRt(), address);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(
+ SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
+ case LD1D_z_p_bz_d_x32_scaled:
+ case LD1H_z_p_bz_d_x32_scaled:
+ case LD1SH_z_p_bz_d_x32_scaled:
+ case LD1SW_z_p_bz_d_x32_scaled:
+ case LD1W_z_p_bz_d_x32_scaled:
+ case LDFF1H_z_p_bz_d_x32_scaled:
+ case LDFF1W_z_p_bz_d_x32_scaled:
+ case LDFF1D_z_p_bz_d_x32_scaled:
+ case LDFF1SH_z_p_bz_d_x32_scaled:
+ case LDFF1SW_z_p_bz_d_x32_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+ SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
+ case LD1D_z_p_bz_d_64_scaled:
+ case LD1H_z_p_bz_d_64_scaled:
+ case LD1SH_z_p_bz_d_64_scaled:
+ case LD1SW_z_p_bz_d_64_scaled:
+ case LD1W_z_p_bz_d_64_scaled:
+ case LDFF1H_z_p_bz_d_64_scaled:
+ case LDFF1W_z_p_bz_d_64_scaled:
+ case LDFF1D_z_p_bz_d_64_scaled:
+ case LDFF1SH_z_p_bz_d_64_scaled:
+ case LDFF1SW_z_p_bz_d_64_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
+ case LD1B_z_p_bz_d_64_unscaled:
+ case LD1D_z_p_bz_d_64_unscaled:
+ case LD1H_z_p_bz_d_64_unscaled:
+ case LD1SB_z_p_bz_d_64_unscaled:
+ case LD1SH_z_p_bz_d_64_unscaled:
+ case LD1SW_z_p_bz_d_64_unscaled:
+ case LD1W_z_p_bz_d_64_unscaled:
+ case LDFF1B_z_p_bz_d_64_unscaled:
+ case LDFF1D_z_p_bz_d_64_unscaled:
+ case LDFF1H_z_p_bz_d_64_unscaled:
+ case LDFF1SB_z_p_bz_d_64_unscaled:
+ case LDFF1SH_z_p_bz_d_64_unscaled:
+ case LDFF1SW_z_p_bz_d_64_unscaled:
+ case LDFF1W_z_p_bz_d_64_unscaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEGatherLoadScalarPlusVectorHelper(instr,
+ kFormatVnD,
+ NO_SVE_OFFSET_MODIFIER);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(
+ SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+ case LD1B_z_p_bz_d_x32_unscaled:
+ case LD1D_z_p_bz_d_x32_unscaled:
+ case LD1H_z_p_bz_d_x32_unscaled:
+ case LD1SB_z_p_bz_d_x32_unscaled:
+ case LD1SH_z_p_bz_d_x32_unscaled:
+ case LD1SW_z_p_bz_d_x32_unscaled:
+ case LD1W_z_p_bz_d_x32_unscaled:
+ case LDFF1B_z_p_bz_d_x32_unscaled:
+ case LDFF1H_z_p_bz_d_x32_unscaled:
+ case LDFF1W_z_p_bz_d_x32_unscaled:
+ case LDFF1D_z_p_bz_d_x32_unscaled:
+ case LDFF1SB_z_p_bz_d_x32_unscaled:
+ case LDFF1SH_z_p_bz_d_x32_unscaled:
+ case LDFF1SW_z_p_bz_d_x32_unscaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+ SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
+ case LD1B_z_p_ai_d:
+ case LD1D_z_p_ai_d:
+ case LD1H_z_p_ai_d:
+ case LD1SB_z_p_ai_d:
+ case LD1SH_z_p_ai_d:
+ case LD1SW_z_p_ai_d:
+ case LD1W_z_p_ai_d:
+ case LDFF1B_z_p_ai_d:
+ case LDFF1D_z_p_ai_d:
+ case LDFF1H_z_p_ai_d:
+ case LDFF1SB_z_p_ai_d:
+ case LDFF1SH_z_p_ai_d:
+ case LDFF1SW_z_p_ai_d:
+ case LDFF1W_z_p_ai_d:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ bool is_signed = instr->ExtractBit(14) == 0;
+ bool is_ff = instr->ExtractBit(13) == 1;
+ // Note that these instructions don't use the Dtype encoding.
+ int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
+ uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
+ LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ if (is_ff) {
+ VIXL_UNIMPLEMENTED();
+ } else {
+ SVEStructuredLoadHelper(kFormatVnD,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ is_signed);
+ }
+}
+
+void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
+ const Instruction* instr) {
+ switch (
+ instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_bz_d_64_scaled:
+ case PRFD_i_p_bz_d_64_scaled:
+ case PRFH_i_p_bz_d_64_scaled:
+ case PRFW_i_p_bz_d_64_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::
+ VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(
+ SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_bz_d_x32_scaled:
+ case PRFD_i_p_bz_d_x32_scaled:
+ case PRFH_i_p_bz_d_x32_scaled:
+ case PRFW_i_p_bz_d_x32_scaled:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
+ // Ignore prefetch hint instructions.
+ case PRFB_i_p_ai_d:
+ case PRFD_i_p_ai_d:
+ case PRFH_i_p_ai_d:
+ case PRFW_i_p_ai_d:
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
+ const Instruction* instr) {
+ bool is_signed;
+ switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
+ case LDFF1B_z_p_br_u8:
+ case LDFF1B_z_p_br_u16:
+ case LDFF1B_z_p_br_u32:
+ case LDFF1B_z_p_br_u64:
+ case LDFF1H_z_p_br_u16:
+ case LDFF1H_z_p_br_u32:
+ case LDFF1H_z_p_br_u64:
+ case LDFF1W_z_p_br_u32:
+ case LDFF1W_z_p_br_u64:
+ case LDFF1D_z_p_br_u64:
+ is_signed = false;
+ break;
+ case LDFF1SB_z_p_br_s16:
+ case LDFF1SB_z_p_br_s32:
+ case LDFF1SB_z_p_br_s64:
+ case LDFF1SH_z_p_br_s32:
+ case LDFF1SH_z_p_br_s64:
+ case LDFF1SW_z_p_br_s64:
+ is_signed = true;
+ break;
+ default:
+ // This encoding group is complete, so no other values should be possible.
+ VIXL_UNREACHABLE();
+ is_signed = false;
+ break;
+ }
+
+ int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+ int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+ VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+ uint64_t offset = ReadXRegister(instr->GetRm());
+ offset <<= msize_in_bytes_log2;
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEFaultTolerantLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ kSVEFirstFaultLoad,
+ is_signed);
+}
+
+void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
+ const Instruction* instr) {
+ bool is_signed = false;
+ switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
+ case LDNF1B_z_p_bi_u16:
+ case LDNF1B_z_p_bi_u32:
+ case LDNF1B_z_p_bi_u64:
+ case LDNF1B_z_p_bi_u8:
+ case LDNF1D_z_p_bi_u64:
+ case LDNF1H_z_p_bi_u16:
+ case LDNF1H_z_p_bi_u32:
+ case LDNF1H_z_p_bi_u64:
+ case LDNF1W_z_p_bi_u32:
+ case LDNF1W_z_p_bi_u64:
+ break;
+ case LDNF1SB_z_p_bi_s16:
+ case LDNF1SB_z_p_bi_s32:
+ case LDNF1SB_z_p_bi_s64:
+ case LDNF1SH_z_p_bi_s32:
+ case LDNF1SH_z_p_bi_s64:
+ case LDNF1SW_z_p_bi_s64:
+ is_signed = true;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+ int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+ VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+ int vl = GetVectorLengthInBytes();
+ int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+ uint64_t offset =
+ (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEFaultTolerantLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ kSVENonFaultLoad,
+ is_signed);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
+ const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = kFormatUndefined;
+
+ switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
+ case LDNT1B_z_p_bi_contiguous:
+ vform = kFormatVnB;
+ break;
+ case LDNT1D_z_p_bi_contiguous:
+ vform = kFormatVnD;
+ break;
+ case LDNT1H_z_p_bi_contiguous:
+ vform = kFormatVnH;
+ break;
+ case LDNT1W_z_p_bi_contiguous:
+ vform = kFormatVnS;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+ int vl = GetVectorLengthInBytes();
+ uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredLoadHelper(vform,
+ pg,
+ instr->GetRt(),
+ addr,
+ /* is_signed = */ false);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
+ const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = kFormatUndefined;
+
+ switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
+ case LDNT1B_z_p_br_contiguous:
+ vform = kFormatVnB;
+ break;
+ case LDNT1D_z_p_br_contiguous:
+ vform = kFormatVnD;
+ break;
+ case LDNT1H_z_p_br_contiguous:
+ vform = kFormatVnH;
+ break;
+ case LDNT1W_z_p_br_contiguous:
+ vform = kFormatVnS;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+ uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredLoadHelper(vform,
+ pg,
+ instr->GetRt(),
+ addr,
+ /* is_signed = */ false);
+}
+
+void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm(
+ const Instruction* instr) {
+ SimVRegister& zt = ReadVRegister(instr->GetRt());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+ uint64_t offset = instr->ExtractSignedBits(19, 16) * 16;
+
+ VectorFormat vform = kFormatUndefined;
+ switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) {
+ case LD1RQB_z_p_bi_u8:
+ vform = kFormatVnB;
+ break;
+ case LD1RQD_z_p_bi_u64:
+ vform = kFormatVnD;
+ break;
+ case LD1RQH_z_p_bi_u16:
+ vform = kFormatVnH;
+ break;
+ case LD1RQW_z_p_bi_u32:
+ vform = kFormatVnS;
+ break;
+ default:
+ addr = offset = 0;
+ break;
+ }
+ ld1(kFormat16B, zt, addr + offset);
+ mov_zeroing(vform, zt, pg, zt);
+ dup_element(kFormatVnQ, zt, zt, 0);
+}
+
+void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar(
+ const Instruction* instr) {
+ SimVRegister& zt = ReadVRegister(instr->GetRt());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+ uint64_t offset = ReadXRegister(instr->GetRm());
+
+ VectorFormat vform = kFormatUndefined;
+ switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) {
+ case LD1RQB_z_p_br_contiguous:
+ vform = kFormatVnB;
+ break;
+ case LD1RQD_z_p_br_contiguous:
+ vform = kFormatVnD;
+ offset <<= 3;
+ break;
+ case LD1RQH_z_p_br_contiguous:
+ vform = kFormatVnH;
+ offset <<= 1;
+ break;
+ case LD1RQW_z_p_br_contiguous:
+ vform = kFormatVnS;
+ offset <<= 2;
+ break;
+ default:
+ addr = offset = 0;
+ break;
+ }
+ ld1(kFormat16B, zt, addr + offset);
+ mov_zeroing(vform, zt, pg, zt);
+ dup_element(kFormatVnQ, zt, zt, 0);
+}
+
+void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
+ case LD2B_z_p_bi_contiguous:
+ case LD2D_z_p_bi_contiguous:
+ case LD2H_z_p_bi_contiguous:
+ case LD2W_z_p_bi_contiguous:
+ case LD3B_z_p_bi_contiguous:
+ case LD3D_z_p_bi_contiguous:
+ case LD3H_z_p_bi_contiguous:
+ case LD3W_z_p_bi_contiguous:
+ case LD4B_z_p_bi_contiguous:
+ case LD4D_z_p_bi_contiguous:
+ case LD4H_z_p_bi_contiguous:
+ case LD4W_z_p_bi_contiguous: {
+ int vl = GetVectorLengthInBytes();
+ int msz = instr->ExtractBits(24, 23);
+ int reg_count = instr->ExtractBits(22, 21) + 1;
+ uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
+ LogicSVEAddressVector addr(
+ ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+ addr.SetMsizeInBytesLog2(msz);
+ addr.SetRegCount(reg_count);
+ SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
+ const Instruction* instr) {
+ switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
+ case LD2B_z_p_br_contiguous:
+ case LD2D_z_p_br_contiguous:
+ case LD2H_z_p_br_contiguous:
+ case LD2W_z_p_br_contiguous:
+ case LD3B_z_p_br_contiguous:
+ case LD3D_z_p_br_contiguous:
+ case LD3H_z_p_br_contiguous:
+ case LD3W_z_p_br_contiguous:
+ case LD4B_z_p_br_contiguous:
+ case LD4D_z_p_br_contiguous:
+ case LD4H_z_p_br_contiguous:
+ case LD4W_z_p_br_contiguous: {
+ int msz = instr->ExtractBits(24, 23);
+ uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
+ LogicSVEAddressVector addr(
+ ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+ addr.SetMsizeInBytesLog2(msz);
+ addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
+ SVEStructuredLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ false);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
+ case ST1H_z_p_bz_s_x32_scaled:
+ case ST1W_z_p_bz_s_x32_scaled: {
+ unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+ int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+ uint64_t base = ReadXRegister(instr->GetRn());
+ SVEOffsetModifier mod =
+ (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ kFormatVnS,
+ mod,
+ scale);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(kFormatVnS,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
+ const Instruction* instr) {
+ switch (
+ instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
+ case ST1B_z_p_bz_s_x32_unscaled:
+ case ST1H_z_p_bz_s_x32_unscaled:
+ case ST1W_z_p_bz_s_x32_unscaled: {
+ unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+ uint64_t base = ReadXRegister(instr->GetRn());
+ SVEOffsetModifier mod =
+ (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ kFormatVnS,
+ mod);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(kFormatVnS,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
+ const Instruction* instr) {
+ int msz = 0;
+ switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
+ case ST1B_z_p_ai_s:
+ msz = 0;
+ break;
+ case ST1H_z_p_ai_s:
+ msz = 1;
+ break;
+ case ST1W_z_p_ai_s:
+ msz = 2;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ uint64_t imm = instr->ExtractBits(20, 16) << msz;
+ LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
+ addr.SetMsizeInBytesLog2(msz);
+ SVEStructuredStoreHelper(kFormatVnS,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+}
+
+void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
+ case ST1D_z_p_bz_d_64_scaled:
+ case ST1H_z_p_bz_d_64_scaled:
+ case ST1W_z_p_bz_d_64_scaled: {
+ unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+ int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+ uint64_t base = ReadXRegister(instr->GetRn());
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ kFormatVnD,
+ SVE_LSL,
+ scale);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(kFormatVnD,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
+ const Instruction* instr) {
+ switch (
+ instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
+ case ST1B_z_p_bz_d_64_unscaled:
+ case ST1D_z_p_bz_d_64_unscaled:
+ case ST1H_z_p_bz_d_64_unscaled:
+ case ST1W_z_p_bz_d_64_unscaled: {
+ unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+ uint64_t base = ReadXRegister(instr->GetRn());
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ kFormatVnD,
+ NO_SVE_OFFSET_MODIFIER);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(kFormatVnD,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+ case ST1D_z_p_bz_d_x32_scaled:
+ case ST1H_z_p_bz_d_x32_scaled:
+ case ST1W_z_p_bz_d_x32_scaled: {
+ unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+ int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+ uint64_t base = ReadXRegister(instr->GetRn());
+ SVEOffsetModifier mod =
+ (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ kFormatVnD,
+ mod,
+ scale);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(kFormatVnD,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
+ const Instruction* instr) {
+ switch (instr->Mask(
+ SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+ case ST1B_z_p_bz_d_x32_unscaled:
+ case ST1D_z_p_bz_d_x32_unscaled:
+ case ST1H_z_p_bz_d_x32_unscaled:
+ case ST1W_z_p_bz_d_x32_unscaled: {
+ unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+ uint64_t base = ReadXRegister(instr->GetRn());
+ SVEOffsetModifier mod =
+ (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+ LogicSVEAddressVector addr(base,
+ &ReadVRegister(instr->GetRm()),
+ kFormatVnD,
+ mod);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(kFormatVnD,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
+ const Instruction* instr) {
+ int msz = 0;
+ switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
+ case ST1B_z_p_ai_d:
+ msz = 0;
+ break;
+ case ST1D_z_p_ai_d:
+ msz = 3;
+ break;
+ case ST1H_z_p_ai_d:
+ msz = 1;
+ break;
+ case ST1W_z_p_ai_d:
+ msz = 2;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ uint64_t imm = instr->ExtractBits(20, 16) << msz;
+ LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
+ addr.SetMsizeInBytesLog2(msz);
+ SVEStructuredStoreHelper(kFormatVnD,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
+ const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = kFormatUndefined;
+
+ switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
+ case STNT1B_z_p_bi_contiguous:
+ vform = kFormatVnB;
+ break;
+ case STNT1D_z_p_bi_contiguous:
+ vform = kFormatVnD;
+ break;
+ case STNT1H_z_p_bi_contiguous:
+ vform = kFormatVnH;
+ break;
+ case STNT1W_z_p_bi_contiguous:
+ vform = kFormatVnS;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+ int vl = GetVectorLengthInBytes();
+ uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
+ const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ VectorFormat vform = kFormatUndefined;
+
+ switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
+ case STNT1B_z_p_br_contiguous:
+ vform = kFormatVnB;
+ break;
+ case STNT1D_z_p_br_contiguous:
+ vform = kFormatVnD;
+ break;
+ case STNT1H_z_p_br_contiguous:
+ vform = kFormatVnH;
+ break;
+ case STNT1W_z_p_br_contiguous:
+ vform = kFormatVnS;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+ uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
+}
+
+void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
+ case ST1B_z_p_bi:
+ case ST1D_z_p_bi:
+ case ST1H_z_p_bi:
+ case ST1W_z_p_bi: {
+ int vl = GetVectorLengthInBytes();
+ int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+ int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
+ VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+ int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+ uint64_t offset =
+ (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
+ VectorFormat vform =
+ SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredStoreHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
+ const Instruction* instr) {
+ switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
+ case ST1B_z_p_br:
+ case ST1D_z_p_br:
+ case ST1H_z_p_br:
+ case ST1W_z_p_br: {
+ uint64_t offset = ReadXRegister(instr->GetRm());
+ offset <<= instr->ExtractBits(24, 23);
+ VectorFormat vform =
+ SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
+ SVEStructuredStoreHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister z_result;
+
+ switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
+ case CPY_z_p_v:
+ dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
+ mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
+ const Instruction* instr) {
+ switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
+ case ST2B_z_p_bi_contiguous:
+ case ST2D_z_p_bi_contiguous:
+ case ST2H_z_p_bi_contiguous:
+ case ST2W_z_p_bi_contiguous:
+ case ST3B_z_p_bi_contiguous:
+ case ST3D_z_p_bi_contiguous:
+ case ST3H_z_p_bi_contiguous:
+ case ST3W_z_p_bi_contiguous:
+ case ST4B_z_p_bi_contiguous:
+ case ST4D_z_p_bi_contiguous:
+ case ST4H_z_p_bi_contiguous:
+ case ST4W_z_p_bi_contiguous: {
+ int vl = GetVectorLengthInBytes();
+ int msz = instr->ExtractBits(24, 23);
+ int reg_count = instr->ExtractBits(22, 21) + 1;
+ uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
+ LogicSVEAddressVector addr(
+ ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+ addr.SetMsizeInBytesLog2(msz);
+ addr.SetRegCount(reg_count);
+ SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
+ const Instruction* instr) {
+ switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
+ case ST2B_z_p_br_contiguous:
+ case ST2D_z_p_br_contiguous:
+ case ST2H_z_p_br_contiguous:
+ case ST2W_z_p_br_contiguous:
+ case ST3B_z_p_br_contiguous:
+ case ST3D_z_p_br_contiguous:
+ case ST3H_z_p_br_contiguous:
+ case ST3W_z_p_br_contiguous:
+ case ST4B_z_p_br_contiguous:
+ case ST4D_z_p_br_contiguous:
+ case ST4H_z_p_br_contiguous:
+ case ST4W_z_p_br_contiguous: {
+ int msz = instr->ExtractBits(24, 23);
+ uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
+ LogicSVEAddressVector addr(
+ ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+ addr.SetMsizeInBytesLog2(msz);
+ addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
+ SVEStructuredStoreHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
+ switch (instr->Mask(SVEStorePredicateRegisterMask)) {
+ case STR_p_bi: {
+ SimPRegister& pt = ReadPRegister(instr->GetPt());
+ int pl = GetPredicateLengthInBytes();
+ int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+ uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+ uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
+ for (int i = 0; i < pl; i++) {
+ Memory::Write(address + i, pt.GetLane<uint8_t>(i));
+ }
+ LogPWrite(instr->GetPt(), address);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
+ switch (instr->Mask(SVEStoreVectorRegisterMask)) {
+ case STR_z_bi: {
+ SimVRegister& zt = ReadVRegister(instr->GetRt());
+ int vl = GetVectorLengthInBytes();
+ int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+ uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+ uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
+ for (int i = 0; i < vl; i++) {
+ Memory::Write(address + i, zt.GetLane<uint8_t>(i));
+ }
+ LogZWrite(instr->GetRt(), address);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEMulIndex(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(SVEMulIndexMask)) {
+ case SDOT_z_zzzi_d:
+ sdot(vform,
+ zda,
+ zn,
+ ReadVRegister(instr->ExtractBits(19, 16)),
+ instr->ExtractBit(20));
+ break;
+ case SDOT_z_zzzi_s:
+ sdot(vform,
+ zda,
+ zn,
+ ReadVRegister(instr->ExtractBits(18, 16)),
+ instr->ExtractBits(20, 19));
+ break;
+ case UDOT_z_zzzi_d:
+ udot(vform,
+ zda,
+ zn,
+ ReadVRegister(instr->ExtractBits(19, 16)),
+ instr->ExtractBit(20));
+ break;
+ case UDOT_z_zzzi_s:
+ udot(vform,
+ zda,
+ zn,
+ ReadVRegister(instr->ExtractBits(18, 16)),
+ instr->ExtractBits(20, 19));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+ SimPRegister result;
+
+ switch (instr->Mask(SVEPartitionBreakConditionMask)) {
+ case BRKAS_p_p_p_z:
+ case BRKA_p_p_p:
+ brka(result, pg, pn);
+ break;
+ case BRKBS_p_p_p_z:
+ case BRKB_p_p_p:
+ brkb(result, pg, pn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (instr->ExtractBit(4) == 1) {
+ mov_merging(pd, pg, result);
+ } else {
+ mov_zeroing(pd, pg, result);
+ }
+
+ // Set flag if needed.
+ if (instr->ExtractBit(22) == 1) {
+ PredTest(kFormatVnB, pg, pd);
+ }
+}
+
+void Simulator::VisitSVEPropagateBreakToNextPartition(
+ const Instruction* instr) {
+ SimPRegister& pdm = ReadPRegister(instr->GetPd());
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+
+ switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
+ case BRKNS_p_p_pp:
+ case BRKN_p_p_pp:
+ brkn(pdm, pg, pn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ // Set flag if needed.
+ if (instr->ExtractBit(22) == 1) {
+ // Note that this ignores `pg`.
+ PredTest(kFormatVnB, GetPTrue(), pdm);
+ }
+}
+
+void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+
+ SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
+ SimVRegister zero;
+ dup_immediate(kFormatVnB, zero, 0);
+
+ switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
+ case PUNPKHI_p_p:
+ zip2(kFormatVnB, temp, temp, zero);
+ break;
+ case PUNPKLO_p_p:
+ zip1(kFormatVnB, temp, temp, zero);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
+}
+
+void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+ SimPRegister& pm = ReadPRegister(instr->GetPm());
+
+ SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
+ SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
+
+ switch (instr->Mask(SVEPermutePredicateElementsMask)) {
+ case TRN1_p_pp:
+ trn1(vform, temp0, temp0, temp1);
+ break;
+ case TRN2_p_pp:
+ trn2(vform, temp0, temp0, temp1);
+ break;
+ case UZP1_p_pp:
+ uzp1(vform, temp0, temp0, temp1);
+ break;
+ case UZP2_p_pp:
+ uzp2(vform, temp0, temp0, temp1);
+ break;
+ case ZIP1_p_pp:
+ zip1(vform, temp0, temp0, temp1);
+ break;
+ case ZIP2_p_pp:
+ zip2(vform, temp0, temp0, temp1);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
+}
+
+void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
+ switch (instr->Mask(SVEReversePredicateElementsMask)) {
+ case REV_p_p: {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
+ rev(vform, temp, temp);
+ Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ // Second source register "Zm" is encoded where "Zn" would usually be.
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+ const int imm8h_mask = 0x001F0000;
+ const int imm8l_mask = 0x00001C00;
+ int index = instr->ExtractBits<imm8h_mask | imm8l_mask>();
+ int vl = GetVectorLengthInBytes();
+ index = (index >= vl) ? 0 : index;
+
+ switch (instr->Mask(SVEPermuteVectorExtractMask)) {
+ case EXT_z_zi_des:
+ ext(kFormatVnB, zdn, zdn, zm, index);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
+ case TRN1_z_zz:
+ trn1(vform, zd, zn, zm);
+ break;
+ case TRN2_z_zz:
+ trn2(vform, zd, zn, zm);
+ break;
+ case UZP1_z_zz:
+ uzp1(vform, zd, zn, zm);
+ break;
+ case UZP2_z_zz:
+ uzp2(vform, zd, zn, zm);
+ break;
+ case ZIP1_z_zz:
+ zip1(vform, zd, zn, zm);
+ break;
+ case ZIP2_z_zz:
+ zip2(vform, zd, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEConditionallyBroadcastElementToVector(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ int active_offset = -1;
+ switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
+ case CLASTA_z_p_zz:
+ active_offset = 1;
+ break;
+ case CLASTB_z_p_zz:
+ active_offset = 0;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (active_offset >= 0) {
+ std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+ if (value.first) {
+ dup_immediate(vform, zdn, value.second);
+ } else {
+ // Trigger a line of trace for the operation, even though it doesn't
+ // change the register value.
+ mov(vform, zdn, zdn);
+ }
+ }
+}
+
+void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& vdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ int active_offset = -1;
+ switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
+ case CLASTA_v_p_z:
+ active_offset = 1;
+ break;
+ case CLASTB_v_p_z:
+ active_offset = 0;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (active_offset >= 0) {
+ LogicVRegister dst(vdn);
+ uint64_t src1_value = dst.Uint(vform, 0);
+ std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
+ dup_immediate(vform, vdn, 0);
+ dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
+ }
+}
+
+void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ int active_offset = -1;
+ switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
+ case CLASTA_r_p_z:
+ active_offset = 1;
+ break;
+ case CLASTB_r_p_z:
+ active_offset = 0;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (active_offset >= 0) {
+ std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+ uint64_t masked_src = ReadXRegister(instr->GetRd()) &
+ GetUintMask(LaneSizeInBitsFromFormat(vform));
+ WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
+ }
+}
+
+void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& vdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ int active_offset = -1;
+ switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
+ case LASTA_v_p_z:
+ active_offset = 1;
+ break;
+ case LASTB_v_p_z:
+ active_offset = 0;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (active_offset >= 0) {
+ LogicVRegister dst(vdn);
+ std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+ dup_immediate(vform, vdn, 0);
+ dst.SetUint(vform, 0, value.second);
+ }
+}
+
+void Simulator::VisitSVEExtractElementToGeneralRegister(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ int active_offset = -1;
+ switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
+ case LASTA_r_p_z:
+ active_offset = 1;
+ break;
+ case LASTB_r_p_z:
+ active_offset = 0;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (active_offset >= 0) {
+ std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+ WriteXRegister(instr->GetRd(), value.second);
+ }
+}
+
+void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ switch (instr->Mask(SVECompressActiveElementsMask)) {
+ case COMPACT_z_p_z:
+ compact(vform, zd, pg, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
+ const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister z_result;
+
+ switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
+ case CPY_z_p_r:
+ dup_immediate(vform,
+ z_result,
+ ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
+ mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+ SimVRegister result;
+ switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
+ case CPY_z_p_i: {
+ // Use unsigned arithmetic to avoid undefined behaviour during the shift.
+ uint64_t imm8 = instr->GetImmSVEIntWideSigned();
+ dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (instr->ExtractBit(14) != 0) {
+ mov_merging(vform, zd, pg, result);
+ } else {
+ mov_zeroing(vform, zd, pg, result);
+ }
+}
+
+void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ // In NEON, the chunk size in which elements are REVersed is in the
+ // instruction mnemonic, and the element size attached to the register.
+ // SVE reverses the semantics; the mapping to logic functions below is to
+ // account for this.
+ VectorFormat chunk_form = instr->GetSVEVectorFormat();
+ VectorFormat element_form = kFormatUndefined;
+
+ switch (instr->Mask(SVEReverseWithinElementsMask)) {
+ case RBIT_z_p_z:
+ rbit(chunk_form, result, zn);
+ break;
+ case REVB_z_z:
+ VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
+ (chunk_form == kFormatVnD));
+ element_form = kFormatVnB;
+ break;
+ case REVH_z_z:
+ VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
+ element_form = kFormatVnH;
+ break;
+ case REVW_z_z:
+ VIXL_ASSERT(chunk_form == kFormatVnD);
+ element_form = kFormatVnS;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
+ VIXL_ASSERT(element_form != kFormatUndefined);
+ switch (chunk_form) {
+ case kFormatVnH:
+ rev16(element_form, result, zn);
+ break;
+ case kFormatVnS:
+ rev32(element_form, result, zn);
+ break;
+ case kFormatVnD:
+ rev64(element_form, result, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+
+ mov_merging(chunk_form, zd, pg, result);
+}
+
+void Simulator::VisitSVEVectorSplice_Destructive(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+ switch (instr->Mask(SVEVectorSplice_DestructiveMask)) {
+ case SPLICE_z_p_zz_des:
+ splice(vform, zdn, pg, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
+ case DUP_z_r:
+ dup_immediate(instr->GetSVEVectorFormat(),
+ zd,
+ ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
+ case INSR_z_v:
+ insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
+ case INSR_z_r:
+ insr(vform, zd, ReadXRegister(instr->GetRn()));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ switch (instr->Mask(SVEBroadcastIndexElementMask)) {
+ case DUP_z_zi: {
+ std::pair<int, int> index_and_lane_size =
+ instr->GetSVEPermuteIndexAndLaneSizeLog2();
+ int index = index_and_lane_size.first;
+ int lane_size_in_bytes_log_2 = index_and_lane_size.second;
+ VectorFormat vform =
+ SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
+ if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
+ // Out of bounds, set the destination register to zero.
+ dup_immediate(kFormatVnD, zd, 0);
+ } else {
+ dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
+ }
+ return;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ switch (instr->Mask(SVEReverseVectorElementsMask)) {
+ case REV_z_z:
+ rev(vform, zd, ReadVRegister(instr->GetRn()));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ switch (instr->Mask(SVEUnpackVectorElementsMask)) {
+ case SUNPKHI_z_z:
+ unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
+ break;
+ case SUNPKLO_z_z:
+ unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
+ break;
+ case UUNPKHI_z_z:
+ unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
+ break;
+ case UUNPKLO_z_z:
+ unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVETableLookup(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ switch (instr->Mask(SVETableLookupMask)) {
+ case TBL_z_zz_1:
+ Table(instr->GetSVEVectorFormat(),
+ zd,
+ ReadVRegister(instr->GetRn()),
+ ReadVRegister(instr->GetRm()));
+ return;
+ default:
+ break;
+ }
+}
+
+void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+
+ switch (instr->Mask(SVEPredicateCountMask)) {
+ case CNTP_r_p_p: {
+ WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
+ Instr op = instr->Mask(SVEPredicateLogicalMask);
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+ SimPRegister& pm = ReadPRegister(instr->GetPm());
+ SimPRegister result;
+ switch (op) {
+ case ANDS_p_p_pp_z:
+ case AND_p_p_pp_z:
+ case BICS_p_p_pp_z:
+ case BIC_p_p_pp_z:
+ case EORS_p_p_pp_z:
+ case EOR_p_p_pp_z:
+ case NANDS_p_p_pp_z:
+ case NAND_p_p_pp_z:
+ case NORS_p_p_pp_z:
+ case NOR_p_p_pp_z:
+ case ORNS_p_p_pp_z:
+ case ORN_p_p_pp_z:
+ case ORRS_p_p_pp_z:
+ case ORR_p_p_pp_z:
+ SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
+ result,
+ pn,
+ pm);
+ break;
+ case SEL_p_p_pp:
+ sel(pd, pg, pn, pm);
+ return;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ mov_zeroing(pd, pg, result);
+ if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
+ PredTest(kFormatVnB, pg, pd);
+ }
+}
+
+void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
+ LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
+ LogicPRegister pdn = ReadPRegister(instr->GetPd());
+ switch (instr->Mask(SVEPredicateFirstActiveMask)) {
+ case PFIRST_p_p_p:
+ pfirst(pdn, pg, pdn);
+ // TODO: Is this broken when pg == pdn?
+ PredTest(kFormatVnB, pg, pdn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
+ // This group only contains PTRUE{S}, and there are no unallocated encodings.
+ VIXL_STATIC_ASSERT(
+ SVEPredicateInitializeMask ==
+ (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
+ VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
+ (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
+
+ LogicPRegister pdn = ReadPRegister(instr->GetPd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
+ if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
+}
+
+void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
+ // This group only contains PNEXT, and there are no unallocated encodings.
+ VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
+ VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
+
+ LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
+ LogicPRegister pdn = ReadPRegister(instr->GetPd());
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ pnext(vform, pdn, pg, pdn);
+ // TODO: Is this broken when pg == pdn?
+ PredTest(vform, pg, pdn);
+}
+
+void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
+ const Instruction* instr) {
+ LogicPRegister pd(ReadPRegister(instr->GetPd()));
+ LogicPRegister pg(ReadPRegister(instr->GetPn()));
+ FlagsUpdate flags = LeaveFlags;
+ switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
+ case RDFFR_p_p_f:
+ // Do nothing.
+ break;
+ case RDFFRS_p_p_f:
+ flags = SetFlags;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ LogicPRegister ffr(ReadFFR());
+ mov_zeroing(pd, pg, ffr);
+
+ if (flags == SetFlags) {
+ PredTest(kFormatVnB, pg, pd);
+ }
+}
+
+void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
+ const Instruction* instr) {
+ LogicPRegister pd(ReadPRegister(instr->GetPd()));
+ LogicPRegister ffr(ReadFFR());
+ switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
+ case RDFFR_p_f:
+ mov(pd, ffr);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
+ switch (instr->Mask(SVEPredicateTestMask)) {
+ case PTEST_p_p:
+ PredTest(kFormatVnB,
+ ReadPRegister(instr->ExtractBits(13, 10)),
+ ReadPRegister(instr->GetPn()));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
+ switch (instr->Mask(SVEPredicateZeroMask)) {
+ case PFALSE_p:
+ pfalse(ReadPRegister(instr->GetPd()));
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+ SimPRegister& pn = ReadPRegister(instr->GetPn());
+ SimPRegister& pm = ReadPRegister(instr->GetPm());
+
+ bool set_flags = false;
+ switch (instr->Mask(SVEPropagateBreakMask)) {
+ case BRKPAS_p_p_pp:
+ set_flags = true;
+ VIXL_FALLTHROUGH();
+ case BRKPA_p_p_pp:
+ brkpa(pd, pg, pn, pm);
+ break;
+ case BRKPBS_p_p_pp:
+ set_flags = true;
+ VIXL_FALLTHROUGH();
+ case BRKPB_p_p_pp:
+ brkpb(pd, pg, pn, pm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ if (set_flags) {
+ PredTest(kFormatVnB, pg, pd);
+ }
+}
+
+void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
+ uint64_t length = 0;
+ switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
+ case ADDPL_r_ri:
+ length = GetPredicateLengthInBytes();
+ break;
+ case ADDVL_r_ri:
+ length = GetVectorLengthInBytes();
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
+ WriteXRegister(instr->GetRd(),
+ base + (length * instr->GetImmSVEVLScale()),
+ LogRegWrites,
+ Reg31IsStackPointer);
+}
+
+void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
+ int64_t scale = instr->GetImmSVEVLScale();
+
+ switch (instr->Mask(SVEStackFrameSizeMask)) {
+ case RDVL_r_i:
+ WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
+ // The only instruction in this group is `sel`, and there are no unused
+ // encodings.
+ VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
+
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ sel(vform, zd, pg, zn, zm);
+}
+
+void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
+ switch (instr->Mask(SVEFFRInitialiseMask)) {
+ case SETFFR_f: {
+ LogicPRegister ffr(ReadFFR());
+ ffr.SetAllBits();
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
+ switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
+ case WRFFR_f_p: {
+ SimPRegister pn(ReadPRegister(instr->GetPn()));
+ bool last_active = true;
+ for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
+ bool active = pn.GetBit(i);
+ if (active && !last_active) {
+ // `pn` is non-monotonic. This is UNPREDICTABLE.
+ VIXL_ABORT();
+ }
+ last_active = active;
+ }
+ mov(ReadFFR(), pn);
+ break;
+ }
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
+void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
+ bool is_signed;
+ switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
+ case LD1B_z_p_bi_u8:
+ case LD1B_z_p_bi_u16:
+ case LD1B_z_p_bi_u32:
+ case LD1B_z_p_bi_u64:
+ case LD1H_z_p_bi_u16:
+ case LD1H_z_p_bi_u32:
+ case LD1H_z_p_bi_u64:
+ case LD1W_z_p_bi_u32:
+ case LD1W_z_p_bi_u64:
+ case LD1D_z_p_bi_u64:
+ is_signed = false;
+ break;
+ case LD1SB_z_p_bi_s16:
+ case LD1SB_z_p_bi_s32:
+ case LD1SB_z_p_bi_s64:
+ case LD1SH_z_p_bi_s32:
+ case LD1SH_z_p_bi_s64:
+ case LD1SW_z_p_bi_s64:
+ is_signed = true;
+ break;
+ default:
+ // This encoding group is complete, so no other values should be possible.
+ VIXL_UNREACHABLE();
+ is_signed = false;
+ break;
+ }
+
+ int vl = GetVectorLengthInBytes();
+ int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+ int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+ VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+ int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+ uint64_t offset =
+ (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ is_signed);
+}
+
+void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
+ const Instruction* instr) {
+ bool is_signed;
+ switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
+ case LD1B_z_p_br_u8:
+ case LD1B_z_p_br_u16:
+ case LD1B_z_p_br_u32:
+ case LD1B_z_p_br_u64:
+ case LD1H_z_p_br_u16:
+ case LD1H_z_p_br_u32:
+ case LD1H_z_p_br_u64:
+ case LD1W_z_p_br_u32:
+ case LD1W_z_p_br_u64:
+ case LD1D_z_p_br_u64:
+ is_signed = false;
+ break;
+ case LD1SB_z_p_br_s16:
+ case LD1SB_z_p_br_s32:
+ case LD1SB_z_p_br_s64:
+ case LD1SH_z_p_br_s32:
+ case LD1SH_z_p_br_s64:
+ case LD1SW_z_p_br_s64:
+ is_signed = true;
+ break;
+ default:
+ // This encoding group is complete, so no other values should be possible.
+ VIXL_UNREACHABLE();
+ is_signed = false;
+ break;
+ }
+
+ int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+ int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+ VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+ uint64_t offset = ReadXRegister(instr->GetRm());
+ offset <<= msize_in_bytes_log2;
+ LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+ addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+ SVEStructuredLoadHelper(vform,
+ ReadPRegister(instr->GetPgLow8()),
+ instr->GetRt(),
+ addr,
+ is_signed);
+}
void Simulator::DoUnreachable(const Instruction* instr) {
VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 7cb7419a..1a89dff7 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -37,7 +37,6 @@
#include "cpu-features-auditor-aarch64.h"
#include "disasm-aarch64.h"
#include "instructions-aarch64.h"
-#include "instrument-aarch64.h"
#include "simulator-constants-aarch64.h"
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
@@ -77,6 +76,22 @@ class Memory {
return value;
}
+ template <typename A>
+ static uint64_t Read(int size_in_bytes, A address) {
+ switch (size_in_bytes) {
+ case 1:
+ return Read<uint8_t>(address);
+ case 2:
+ return Read<uint16_t>(address);
+ case 4:
+ return Read<uint32_t>(address);
+ case 8:
+ return Read<uint64_t>(address);
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+
template <typename T, typename A>
static void Write(A address, T value) {
address = AddressUntag(address);
@@ -87,19 +102,33 @@ class Memory {
}
};
-// Represent a register (r0-r31, v0-v31).
-template <int kSizeInBytes>
+// Represent a register (r0-r31, v0-v31, z0-z31, p0-p15).
+template <unsigned kMaxSizeInBits>
class SimRegisterBase {
public:
- SimRegisterBase() : written_since_last_log_(false) {}
+ static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte;
+ VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits);
+
+ SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); }
+
+ unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; }
+ unsigned GetSizeInBytes() const { return size_in_bytes_; }
+
+ void SetSizeInBytes(unsigned size_in_bytes) {
+ VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes);
+ size_in_bytes_ = size_in_bytes;
+ }
+ void SetSizeInBits(unsigned size_in_bits) {
+ VIXL_ASSERT(size_in_bits <= kMaxSizeInBits);
+ VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
+ SetSizeInBytes(size_in_bits / kBitsPerByte);
+ }
// Write the specified value. The value is zero-extended if necessary.
template <typename T>
void Write(T new_value) {
- if (sizeof(new_value) < kSizeInBytes) {
- // All AArch64 registers are zero-extending.
- memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
- }
+ // All AArch64 registers are zero-extending.
+ if (sizeof(new_value) < GetSizeInBytes()) Clear();
WriteLane(new_value, 0);
NotifyRegisterWrite();
}
@@ -108,6 +137,11 @@ class SimRegisterBase {
Write(new_value);
}
+ void Clear() {
+ memset(value_, 0, kMaxSizeInBytes);
+ NotifyRegisterWrite();
+ }
+
// Insert a typed value into a register, leaving the rest of the register
// unchanged. The lane parameter indicates where in the register the value
// should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
@@ -137,6 +171,17 @@ class SimRegisterBase {
return GetLane(lane);
}
+ // Get the value of a specific bit, indexed from the least-significant bit of
+ // lane 0.
+ bool GetBit(int bit) const {
+ int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte);
+ int byte = bit / (sizeof(value_[0]) * kBitsPerByte);
+ return ((value_[byte] >> bit_in_byte) & 1) != 0;
+ }
+
+ // Return a pointer to the raw, underlying byte array.
+ const uint8_t* GetBytes() const { return value_; }
+
// TODO: Make this return a map of updated bytes, so that we can highlight
// updated lanes for load-and-insert. (That never happens for scalar code, but
// NEON has some instructions that can update individual lanes.)
@@ -145,7 +190,9 @@ class SimRegisterBase {
void NotifyRegisterLogged() { written_since_last_log_ = false; }
protected:
- uint8_t value_[kSizeInBytes];
+ uint8_t value_[kMaxSizeInBytes];
+
+ unsigned size_in_bytes_;
// Helpers to aid with register tracing.
bool written_since_last_log_;
@@ -156,38 +203,152 @@ class SimRegisterBase {
template <typename T>
void ReadLane(T* dst, int lane) const {
VIXL_ASSERT(lane >= 0);
- VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= kSizeInBytes);
+ VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes());
memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst));
}
template <typename T>
void WriteLane(T src, int lane) {
VIXL_ASSERT(lane >= 0);
- VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= kSizeInBytes);
+ VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes());
memcpy(&value_[lane * sizeof(src)], &src, sizeof(src));
}
+
+ // The default ReadLane and WriteLane methods assume what we are copying is
+ // "trivially copyable" by using memcpy. We have to provide alternative
+ // implementations for SimFloat16 which cannot be copied this way.
+
+ void ReadLane(vixl::internal::SimFloat16* dst, int lane) const {
+ uint16_t rawbits;
+ ReadLane(&rawbits, lane);
+ *dst = RawbitsToFloat16(rawbits);
+ }
+
+ void WriteLane(vixl::internal::SimFloat16 src, int lane) {
+ WriteLane(Float16ToRawbits(src), lane);
+ }
+};
+
+typedef SimRegisterBase<kXRegSize> SimRegister; // r0-r31
+typedef SimRegisterBase<kPRegMaxSize> SimPRegister; // p0-p15
+// FFR has the same format as a predicate register.
+typedef SimPRegister SimFFRRegister;
+
+// v0-v31 and z0-z31
+class SimVRegister : public SimRegisterBase<kZRegMaxSize> {
+ public:
+ SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {}
+
+ void NotifyAccessAsZ() { accessed_as_z_ = true; }
+
+ void NotifyRegisterLogged() {
+ SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged();
+ accessed_as_z_ = false;
+ }
+
+ bool AccessedAsZSinceLastLog() const { return accessed_as_z_; }
+
+ private:
+ bool accessed_as_z_;
+};
+
+// Representation of a SVE predicate register.
+class LogicPRegister {
+ public:
+ inline LogicPRegister(
+ SimPRegister& other) // NOLINT(runtime/references)(runtime/explicit)
+ : register_(other) {}
+
+ // Set a conveniently-sized block to 16 bits as the minimum predicate length
+ // is 16 bits and allow to be increased to multiples of 16 bits.
+ typedef uint16_t ChunkType;
+
+ // Assign a bit into the end positon of the specified lane.
+ // The bit is zero-extended if necessary.
+ void SetActive(VectorFormat vform, int lane_index, bool value) {
+ int psize = LaneSizeInBytesFromFormat(vform);
+ int bit_index = lane_index * psize;
+ int byte_index = bit_index / kBitsPerByte;
+ int bit_offset = bit_index % kBitsPerByte;
+ uint8_t byte = register_.GetLane<uint8_t>(byte_index);
+ register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value));
+ }
+
+ bool IsActive(VectorFormat vform, int lane_index) const {
+ int psize = LaneSizeInBytesFromFormat(vform);
+ int bit_index = lane_index * psize;
+ int byte_index = bit_index / kBitsPerByte;
+ int bit_offset = bit_index % kBitsPerByte;
+ uint8_t byte = register_.GetLane<uint8_t>(byte_index);
+ return ExtractBit(byte, bit_offset);
+ }
+
+ // The accessors for bulk processing.
+ int GetChunkCount() const {
+ VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0);
+ return register_.GetSizeInBytes() / sizeof(ChunkType);
+ }
+
+ ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); }
+
+ void SetChunk(int lane, ChunkType new_value) {
+ SetActiveMask(lane, new_value);
+ }
+
+ void SetAllBits() {
+ int chunk_size = sizeof(ChunkType) * kBitsPerByte;
+ ChunkType bits = GetUintMask(chunk_size);
+ for (int lane = 0;
+ lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
+ lane++) {
+ SetChunk(lane, bits);
+ }
+ }
+
+ template <typename T>
+ T GetActiveMask(int lane) const {
+ return register_.GetLane<T>(lane);
+ }
+
+ template <typename T>
+ void SetActiveMask(int lane, T new_value) {
+ register_.Insert<T>(lane, new_value);
+ }
+
+ void Clear() { register_.Clear(); }
+
+ bool Aliases(const LogicPRegister& other) const {
+ return &register_ == &other.register_;
+ }
+
+ private:
+ // The bit assignment is zero-extended to fill the size of predicate element.
+ uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) {
+ VIXL_ASSERT(index >= 0);
+ VIXL_ASSERT(index + psize <= kBitsPerByte);
+ int bits = value ? 1 : 0;
+ switch (psize) {
+ case 1:
+ AssignBit(byte, index, bits);
+ break;
+ case 2:
+ AssignBits(byte, index, 0x03, bits);
+ break;
+ case 4:
+ AssignBits(byte, index, 0x0f, bits);
+ break;
+ case 8:
+ AssignBits(byte, index, 0xff, bits);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+ return byte;
+ }
+
+ SimPRegister& register_;
};
-typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31
-typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31
-
-// The default ReadLane and WriteLane methods assume what we are copying is
-// "trivially copyable" by using memcpy. We have to provide alternative
-// implementations for SimFloat16 which cannot be copied this way.
-
-template <>
-template <>
-inline void SimVRegister::ReadLane(vixl::internal::SimFloat16* dst,
- int lane) const {
- uint16_t rawbits;
- ReadLane(&rawbits, lane);
- *dst = RawbitsToFloat16(rawbits);
-}
-
-template <>
-template <>
-inline void SimVRegister::WriteLane(vixl::internal::SimFloat16 src, int lane) {
- WriteLane(Float16ToRawbits(src), lane);
-}
// Representation of a vector register, with typed getters and setters for lanes
// and additional information to represent lane state.
@@ -205,6 +366,7 @@ class LogicVRegister {
}
int64_t Int(VectorFormat vform, int index) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
int64_t element;
switch (LaneSizeInBitsFromFormat(vform)) {
case 8:
@@ -227,6 +389,7 @@ class LogicVRegister {
}
uint64_t Uint(VectorFormat vform, int index) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
uint64_t element;
switch (LaneSizeInBitsFromFormat(vform)) {
case 8:
@@ -260,6 +423,7 @@ class LogicVRegister {
}
void SetInt(VectorFormat vform, int index, int64_t value) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
switch (LaneSizeInBitsFromFormat(vform)) {
case 8:
register_.Insert(index, static_cast<int8_t>(value));
@@ -287,6 +451,7 @@ class LogicVRegister {
}
void SetUint(VectorFormat vform, int index, uint64_t value) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
switch (LaneSizeInBitsFromFormat(vform)) {
case 8:
register_.Insert(index, static_cast<uint8_t>(value));
@@ -313,7 +478,98 @@ class LogicVRegister {
}
}
+ void ReadIntFromMem(VectorFormat vform,
+ unsigned msize_in_bits,
+ int index,
+ uint64_t addr) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
+ int64_t value;
+ switch (msize_in_bits) {
+ case 8:
+ value = Memory::Read<int8_t>(addr);
+ break;
+ case 16:
+ value = Memory::Read<int16_t>(addr);
+ break;
+ case 32:
+ value = Memory::Read<int32_t>(addr);
+ break;
+ case 64:
+ value = Memory::Read<int64_t>(addr);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ return;
+ }
+
+ unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform);
+ VIXL_ASSERT(esize_in_bits >= msize_in_bits);
+ switch (esize_in_bits) {
+ case 8:
+ register_.Insert(index, static_cast<int8_t>(value));
+ break;
+ case 16:
+ register_.Insert(index, static_cast<int16_t>(value));
+ break;
+ case 32:
+ register_.Insert(index, static_cast<int32_t>(value));
+ break;
+ case 64:
+ register_.Insert(index, static_cast<int64_t>(value));
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ return;
+ }
+ }
+
+ void ReadUintFromMem(VectorFormat vform,
+ unsigned msize_in_bits,
+ int index,
+ uint64_t addr) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
+ uint64_t value;
+ switch (msize_in_bits) {
+ case 8:
+ value = Memory::Read<uint8_t>(addr);
+ break;
+ case 16:
+ value = Memory::Read<uint16_t>(addr);
+ break;
+ case 32:
+ value = Memory::Read<uint32_t>(addr);
+ break;
+ case 64:
+ value = Memory::Read<uint64_t>(addr);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ return;
+ }
+
+ unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform);
+ VIXL_ASSERT(esize_in_bits >= msize_in_bits);
+ switch (esize_in_bits) {
+ case 8:
+ register_.Insert(index, static_cast<uint8_t>(value));
+ break;
+ case 16:
+ register_.Insert(index, static_cast<uint16_t>(value));
+ break;
+ case 32:
+ register_.Insert(index, static_cast<uint32_t>(value));
+ break;
+ case 64:
+ register_.Insert(index, static_cast<uint64_t>(value));
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ return;
+ }
+ }
+
void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
switch (LaneSizeInBitsFromFormat(vform)) {
case 8:
register_.Insert(index, Memory::Read<uint8_t>(addr));
@@ -334,6 +590,7 @@ class LogicVRegister {
}
void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
uint64_t value = Uint(vform, index);
switch (LaneSizeInBitsFromFormat(vform)) {
case 8:
@@ -361,11 +618,20 @@ class LogicVRegister {
register_.Insert(index, value);
}
- // When setting a result in a register of size less than Q, the top bits of
- // the Q register must be cleared.
+ template <typename T>
+ void SetFloat(VectorFormat vform, int index, T value) const {
+ if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
+ register_.Insert(index, value);
+ }
+
+ // When setting a result in a register larger than the result itself, the top
+ // bits of the register must be cleared.
void ClearForWrite(VectorFormat vform) const {
+ // SVE destinations write whole registers, so we have nothing to clear.
+ if (IsSVEFormat(vform)) return;
+
unsigned size = RegisterSizeInBytesFromFormat(vform);
- for (unsigned i = size; i < kQRegSizeInBytes; i++) {
+ for (unsigned i = size; i < register_.GetSizeInBytes(); i++) {
SetUint(kFormat16B, i, 0);
}
}
@@ -481,15 +747,129 @@ class LogicVRegister {
return *this;
}
+ int LaneCountFromFormat(VectorFormat vform) const {
+ if (IsSVEFormat(vform)) {
+ return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform);
+ } else {
+ return vixl::aarch64::LaneCountFromFormat(vform);
+ }
+ }
+
private:
SimVRegister& register_;
// Allocate one saturation state entry per lane; largest register is type Q,
// and lanes can be a minimum of one byte wide.
- Saturation saturated_[kQRegSizeInBytes];
+ Saturation saturated_[kZRegMaxSizeInBytes];
// Allocate one rounding state entry per lane.
- bool round_[kQRegSizeInBytes];
+ bool round_[kZRegMaxSizeInBytes];
+};
+
+// Represent an SVE addressing mode and abstract per-lane address generation to
+// make iteration easy.
+//
+// Contiguous accesses are described with a simple base address, the memory
+// occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in
+// each struct (`SetRegCount()`).
+//
+// Scatter-gather accesses also require a SimVRegister and information about how
+// to extract lanes from it.
+class LogicSVEAddressVector {
+ public:
+ // scalar-plus-scalar
+ // scalar-plus-immediate
+ explicit LogicSVEAddressVector(uint64_t base)
+ : base_(base),
+ msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
+ reg_count_(1),
+ vector_(NULL),
+ vector_form_(kFormatUndefined),
+ vector_mod_(NO_SVE_OFFSET_MODIFIER),
+ vector_shift_(0) {}
+
+ // scalar-plus-vector
+ // vector-plus-immediate
+ // `base` should be the constant used for each element. That is, the value
+ // of `xn`, or `#<imm>`.
+ // `vector` should be the SimVRegister with offsets for each element. The
+ // vector format must be specified; SVE scatter/gather accesses typically
+ // support both 32-bit and 64-bit addressing.
+ //
+ // `mod` and `shift` correspond to the modifiers applied to each element in
+ // scalar-plus-vector forms, such as those used for unpacking and
+ // sign-extension. They are not used for vector-plus-immediate.
+ LogicSVEAddressVector(uint64_t base,
+ const SimVRegister* vector,
+ VectorFormat vform,
+ SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER,
+ int shift = 0)
+ : base_(base),
+ msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
+ reg_count_(1),
+ vector_(vector),
+ vector_form_(vform),
+ vector_mod_(mod),
+ vector_shift_(shift) {}
+
+ // Set `msize` -- the memory occupied by each lane -- for address
+ // calculations.
+ void SetMsizeInBytesLog2(int msize_in_bytes_log2) {
+ VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2));
+ VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2));
+ msize_in_bytes_log2_ = msize_in_bytes_log2;
+ }
+
+ bool HasMsize() const {
+ return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2;
+ }
+
+ int GetMsizeInBytesLog2() const {
+ VIXL_ASSERT(HasMsize());
+ return msize_in_bytes_log2_;
+ }
+ int GetMsizeInBitsLog2() const {
+ return GetMsizeInBytesLog2() + kBitsPerByteLog2;
+ }
+
+ int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); }
+ int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); }
+
+ void SetRegCount(int reg_count) {
+ VIXL_ASSERT(reg_count >= 1); // E.g. ld1/st1
+ VIXL_ASSERT(reg_count <= 4); // E.g. ld4/st4
+ reg_count_ = reg_count;
+ }
+
+ int GetRegCount() const { return reg_count_; }
+
+ // Full per-element address calculation for structured accesses.
+ //
+ // Note that the register number argument (`reg`) is zero-based.
+ uint64_t GetElementAddress(int lane, int reg) const {
+ VIXL_ASSERT(reg < GetRegCount());
+ // Individual structures are always contiguous in memory, so this
+ // implementation works for both contiguous and scatter-gather addressing.
+ return GetStructAddress(lane) + (reg * GetMsizeInBytes());
+ }
+
+ // Full per-struct address calculation for structured accesses.
+ uint64_t GetStructAddress(int lane) const;
+
+ bool IsContiguous() const { return vector_ == NULL; }
+ bool IsScatterGather() const { return !IsContiguous(); }
+
+ private:
+ uint64_t base_;
+ int msize_in_bytes_log2_;
+ int reg_count_;
+
+ const SimVRegister* vector_;
+ VectorFormat vector_form_;
+ SVEOffsetModifier vector_mod_;
+ int vector_shift_;
+
+ static const int kUnknownMsizeInBytesLog2 = -1;
};
// The proper way to initialize a simulated system register (such as NZCV) is as
@@ -733,6 +1113,11 @@ class Simulator : public DecoderVisitor {
VIXL_ASSERT(IsWordAligned(pc_));
pc_modified_ = false;
+ if (movprfx_ != NULL) {
+ VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_));
+ movprfx_ = NULL;
+ }
+
// On guarded pages, if BType is not zero, take an exception on any
// instruction other than BTI, PACI[AB]SP, HLT or BRK.
if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
@@ -774,13 +1159,6 @@ class Simulator : public DecoderVisitor {
#undef DECLARE
-#define DECLARE(A) \
- VIXL_NO_RETURN_IN_DEBUG_MODE virtual void Visit##A(const Instruction* instr) \
- VIXL_OVERRIDE;
- VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(DECLARE)
-#undef DECLARE
-
-
// Integer register accessors.
// Basic accessor: Read the register as the specified type.
@@ -827,6 +1205,13 @@ class Simulator : public DecoderVisitor {
return ReadXRegister(code, r31mode);
}
+ SimPRegister& ReadPRegister(unsigned code) {
+ VIXL_ASSERT(code < kNumberOfPRegisters);
+ return pregisters_[code];
+ }
+
+ SimFFRRegister& ReadFFR() { return ffr_register_; }
+
// As above, with parameterized size and return type. The value is
// either zero-extended or truncated to fit, as required.
template <typename T>
@@ -877,6 +1262,10 @@ class Simulator : public DecoderVisitor {
// Write 'value' into an integer register. The value is zero-extended. This
// behaviour matches AArch64 register writes.
+ //
+ // SP may be specified in one of two ways:
+ // - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister)
+ // - (code == 31) && (r31mode == Reg31IsStackPointer)
template <typename T>
void WriteRegister(unsigned code,
T value,
@@ -896,20 +1285,25 @@ class Simulator : public DecoderVisitor {
VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
(sizeof(T) == kXRegSizeInBytes));
VIXL_ASSERT(
- code < kNumberOfRegisters ||
+ (code < kNumberOfRegisters) ||
((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
- if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
- return;
- }
-
- if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) {
- code = 31;
+ if (code == 31) {
+ if (r31mode == Reg31IsZeroRegister) {
+ // Discard writes to the zero register.
+ return;
+ } else {
+ code = kSPRegInternalCode;
+ }
}
- registers_[code].Write(value);
+ // registers_[31] is the stack pointer.
+ VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
+ registers_[code % kNumberOfRegisters].Write(value);
- if (log_mode == LogRegWrites) LogRegister(code, r31mode);
+ if (log_mode == LogRegWrites) {
+ LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T)));
+ }
}
template <typename T>
VIXL_DEPRECATED("WriteRegister",
@@ -1015,6 +1409,11 @@ class Simulator : public DecoderVisitor {
uint8_t val[kQRegSizeInBytes];
};
+ // A structure for representing a SVE Z register.
+ struct zreg_t {
+ uint8_t val[kZRegMaxSizeInBytes];
+ };
+
// Basic accessor: read the register as the specified type.
template <typename T>
T ReadVRegister(unsigned code) const {
@@ -1130,7 +1529,8 @@ class Simulator : public DecoderVisitor {
(sizeof(value) == kHRegSizeInBytes) ||
(sizeof(value) == kSRegSizeInBytes) ||
(sizeof(value) == kDRegSizeInBytes) ||
- (sizeof(value) == kQRegSizeInBytes));
+ (sizeof(value) == kQRegSizeInBytes) ||
+ (sizeof(value) == kZRegMaxSizeInBytes));
VIXL_ASSERT(code < kNumberOfVRegisters);
vregisters_[code].Write(value);
@@ -1237,6 +1637,12 @@ class Simulator : public DecoderVisitor {
WriteQRegister(code, value, log_mode);
}
+ void WriteZRegister(unsigned code,
+ zreg_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ WriteVRegister(code, value, log_mode);
+ }
+
template <typename T>
T ReadRegister(Register reg) const {
return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister);
@@ -1357,14 +1763,16 @@ class Simulator : public DecoderVisitor {
kPrintRegLaneSizeD = 3 << 0,
kPrintRegLaneSizeX = kPrintRegLaneSizeD,
kPrintRegLaneSizeQ = 4 << 0,
+ kPrintRegLaneSizeUnknown = 5 << 0,
kPrintRegLaneSizeOffset = 0,
kPrintRegLaneSizeMask = 7 << 0,
- // The lane count.
+ // The overall register size.
kPrintRegAsScalar = 0,
kPrintRegAsDVector = 1 << 3,
kPrintRegAsQVector = 2 << 3,
+ kPrintRegAsSVEVector = 3 << 3,
kPrintRegAsVectorMask = 3 << 3,
@@ -1372,37 +1780,98 @@ class Simulator : public DecoderVisitor {
// S-, H-, and D-sized lanes.)
kPrintRegAsFP = 1 << 5,
- // Supported combinations.
-
- kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
- kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
- kPrintHReg = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP,
- kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
- kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
-
- kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
- kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
- kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
- kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
- kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
- kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
- kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
- kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
- kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
- kPrintReg1HFP = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP,
- kPrintReg4HFP = kPrintRegLaneSizeH | kPrintRegAsDVector | kPrintRegAsFP,
- kPrintReg8HFP = kPrintRegLaneSizeH | kPrintRegAsQVector | kPrintRegAsFP,
- kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
- kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
- kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
- kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
- kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
- kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
- kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
- kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
+ // With this flag, print helpers won't check that the upper bits are zero.
+ // This also forces the register name to be printed with the `reg<msb:0>`
+ // format.
+ //
+ // The flag is supported with any PrintRegisterFormat other than those with
+ // kPrintRegAsSVEVector.
+ kPrintRegPartial = 1 << 6,
+
+// Supported combinations.
+// These exist so that they can be referred to by name, but also because C++
+// does not allow enum types to hold values that aren't explicitly
+// enumerated, and we want to be able to combine the above flags.
+
+// Scalar formats.
+#define VIXL_DECL_PRINT_REG_SCALAR(size) \
+ kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \
+ kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial
+#define VIXL_DECL_PRINT_REG_SCALAR_FP(size) \
+ VIXL_DECL_PRINT_REG_SCALAR(size) \
+ , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \
+ kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP
+ VIXL_DECL_PRINT_REG_SCALAR(W),
+ VIXL_DECL_PRINT_REG_SCALAR(X),
+ VIXL_DECL_PRINT_REG_SCALAR_FP(H),
+ VIXL_DECL_PRINT_REG_SCALAR_FP(S),
+ VIXL_DECL_PRINT_REG_SCALAR_FP(D),
+ VIXL_DECL_PRINT_REG_SCALAR(Q),
+#undef VIXL_DECL_PRINT_REG_SCALAR
+#undef VIXL_DECL_PRINT_REG_SCALAR_FP
+
+#define VIXL_DECL_PRINT_REG_NEON(count, type, size) \
+ kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \
+ kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial
+#define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size) \
+ VIXL_DECL_PRINT_REG_NEON(count, type, size) \
+ , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \
+ kPrintReg##count##type##PartialFP = \
+ kPrintReg##count##type##Partial | kPrintRegAsFP
+ VIXL_DECL_PRINT_REG_NEON(1, B, Scalar),
+ VIXL_DECL_PRINT_REG_NEON(8, B, DVector),
+ VIXL_DECL_PRINT_REG_NEON(16, B, QVector),
+ VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar),
+ VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector),
+ VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector),
+ VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar),
+ VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector),
+ VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector),
+ VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar),
+ VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector),
+ VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar),
+#undef VIXL_DECL_PRINT_REG_NEON
+#undef VIXL_DECL_PRINT_REG_NEON_FP
+
+#define VIXL_DECL_PRINT_REG_SVE(type) \
+ kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \
+ kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial
+#define VIXL_DECL_PRINT_REG_SVE_FP(type) \
+ VIXL_DECL_PRINT_REG_SVE(type) \
+ , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \
+ kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP
+ VIXL_DECL_PRINT_REG_SVE(B),
+ VIXL_DECL_PRINT_REG_SVE_FP(H),
+ VIXL_DECL_PRINT_REG_SVE_FP(S),
+ VIXL_DECL_PRINT_REG_SVE_FP(D),
+ VIXL_DECL_PRINT_REG_SVE(Q)
+#undef VIXL_DECL_PRINT_REG_SVE
+#undef VIXL_DECL_PRINT_REG_SVE_FP
};
+ // Return `format` with the kPrintRegPartial flag set.
+ PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) {
+ // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the
+ // result of this cast will always be well-defined.
+ return static_cast<PrintRegisterFormat>(format | kPrintRegPartial);
+ }
+
+ // For SVE formats, return the format of a Q register part of it.
+ PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) {
+ VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+ // Keep the FP and lane size fields.
+ int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP);
+ // The resulting format must always be partial, because we're not formatting
+ // the whole Z register.
+ q_format |= (kPrintRegAsQVector | kPrintRegPartial);
+
+ // This cast is always safe because NEON QVector formats support every
+ // combination of FP and lane size that SVE formats do.
+ return static_cast<PrintRegisterFormat>(q_format);
+ }
+
unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
+ VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown);
return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
}
@@ -1411,17 +1880,51 @@ class Simulator : public DecoderVisitor {
}
unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
- if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2;
- if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2;
-
- // Scalar types.
- return GetPrintRegLaneSizeInBytesLog2(format);
+ switch (format & kPrintRegAsVectorMask) {
+ case kPrintRegAsScalar:
+ return GetPrintRegLaneSizeInBytesLog2(format);
+ case kPrintRegAsDVector:
+ return kDRegSizeInBytesLog2;
+ case kPrintRegAsQVector:
+ return kQRegSizeInBytesLog2;
+ default:
+ case kPrintRegAsSVEVector:
+ // We print SVE vectors in Q-sized chunks. These need special handling,
+ // and it's probably an error to call this function in that case.
+ VIXL_UNREACHABLE();
+ return kQRegSizeInBytesLog2;
+ }
}
unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
return 1 << GetPrintRegSizeInBytesLog2(format);
}
+ unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) {
+ return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2;
+ }
+
+ unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) {
+ return 1 << GetPrintRegSizeInBitsLog2(format);
+ }
+
+ const char* GetPartialRegSuffix(PrintRegisterFormat format) {
+ switch (GetPrintRegSizeInBitsLog2(format)) {
+ case kBRegSizeLog2:
+ return "<7:0>";
+ case kHRegSizeLog2:
+ return "<15:0>";
+ case kSRegSizeLog2:
+ return "<31:0>";
+ case kDRegSizeLog2:
+ return "<63:0>";
+ case kQRegSizeLog2:
+ return "<127:0>";
+ }
+ VIXL_UNREACHABLE();
+ return "<UNKNOWN>";
+ }
+
unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
@@ -1429,6 +1932,21 @@ class Simulator : public DecoderVisitor {
return 1 << (reg_size_log2 - lane_size_log2);
}
+ uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) {
+ int print_as = format & kPrintRegAsVectorMask;
+ if (print_as == kPrintRegAsScalar) return 1;
+
+ // Vector formats, including SVE formats printed in Q-sized chunks.
+ static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001};
+ unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format);
+ VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks));
+ uint16_t mask = masks[size_in_bytes_log2];
+
+ // Exclude lanes that aren't visible in D vectors.
+ if (print_as == kPrintRegAsDVector) mask &= 0x00ff;
+ return mask;
+ }
+
PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
unsigned lane_size);
@@ -1459,6 +1977,10 @@ class Simulator : public DecoderVisitor {
return format;
}
+ PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) {
+ return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size));
+ }
+
template <typename T>
PrintRegisterFormat GetPrintRegisterFormat(T value) {
return GetPrintRegisterFormatForSize(sizeof(value));
@@ -1485,99 +2007,314 @@ class Simulator : public DecoderVisitor {
// Print all registers of the specified types.
void PrintRegisters();
void PrintVRegisters();
+ void PrintZRegisters();
void PrintSystemRegisters();
// As above, but only print the registers that have been updated.
void PrintWrittenRegisters();
void PrintWrittenVRegisters();
+ void PrintWrittenPRegisters();
// As above, but respect LOG_REG and LOG_VREG.
void LogWrittenRegisters() {
- if (GetTraceParameters() & LOG_REGS) PrintWrittenRegisters();
+ if (ShouldTraceRegs()) PrintWrittenRegisters();
}
void LogWrittenVRegisters() {
- if (GetTraceParameters() & LOG_VREGS) PrintWrittenVRegisters();
+ if (ShouldTraceVRegs()) PrintWrittenVRegisters();
+ }
+ void LogWrittenPRegisters() {
+ if (ShouldTraceVRegs()) PrintWrittenPRegisters();
}
void LogAllWrittenRegisters() {
LogWrittenRegisters();
LogWrittenVRegisters();
+ LogWrittenPRegisters();
+ }
+
+ // The amount of space to leave for a register name. This is used to keep the
+ // values vertically aligned. The longest register name has the form
+ // "z31<2047:1920>". The total overall value indentation must also take into
+ // account the fixed formatting: "# {name}: 0x{value}".
+ static const int kPrintRegisterNameFieldWidth = 14;
+
+ // Print whole, individual register values.
+ // - The format can be used to restrict how much of the register is printed,
+ // but such formats indicate that the unprinted high-order bits are zero and
+ // these helpers will assert that.
+ // - If the format includes the kPrintRegAsFP flag then human-friendly FP
+ // value annotations will be printed.
+ // - The suffix can be used to add annotations (such as memory access
+ // details), or to suppress the newline.
+ void PrintRegister(int code,
+ PrintRegisterFormat format = kPrintXReg,
+ const char* suffix = "\n");
+ void PrintVRegister(int code,
+ PrintRegisterFormat format = kPrintReg1Q,
+ const char* suffix = "\n");
+ // PrintZRegister and PrintPRegister print over several lines, so they cannot
+ // allow the suffix to be overridden.
+ void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
+ void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
+ void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ);
+ // Print a single Q-sized part of a Z register, or the corresponding two-byte
+ // part of a P register. These print single lines, and therefore allow the
+ // suffix to be overridden. The format must include the kPrintRegPartial flag.
+ void PrintPartialZRegister(int code,
+ int q_index,
+ PrintRegisterFormat format = kPrintRegVnQ,
+ const char* suffix = "\n");
+ void PrintPartialPRegister(int code,
+ int q_index,
+ PrintRegisterFormat format = kPrintRegVnQ,
+ const char* suffix = "\n");
+ void PrintPartialPRegister(const char* name,
+ const SimPRegister& reg,
+ int q_index,
+ PrintRegisterFormat format = kPrintRegVnQ,
+ const char* suffix = "\n");
+
+ // Like Print*Register (above), but respect trace parameters.
+ void LogRegister(unsigned code, PrintRegisterFormat format) {
+ if (ShouldTraceRegs()) PrintRegister(code, format);
+ }
+ void LogVRegister(unsigned code, PrintRegisterFormat format) {
+ if (ShouldTraceVRegs()) PrintVRegister(code, format);
+ }
+ void LogZRegister(unsigned code, PrintRegisterFormat format) {
+ if (ShouldTraceVRegs()) PrintZRegister(code, format);
+ }
+ void LogPRegister(unsigned code, PrintRegisterFormat format) {
+ if (ShouldTraceVRegs()) PrintPRegister(code, format);
+ }
+ void LogFFR(PrintRegisterFormat format) {
+ if (ShouldTraceVRegs()) PrintFFR(format);
}
- // Print individual register values (after update).
- void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
- void PrintVRegister(unsigned code, PrintRegisterFormat format);
+ // Other state updates, including system registers.
void PrintSystemRegister(SystemRegister id);
void PrintTakenBranch(const Instruction* target);
+ void LogSystemRegister(SystemRegister id) {
+ if (ShouldTraceSysRegs()) PrintSystemRegister(id);
+ }
+ void LogTakenBranch(const Instruction* target) {
+ if (ShouldTraceBranches()) PrintTakenBranch(target);
+ }
- // Like Print* (above), but respect GetTraceParameters().
- void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
- if (GetTraceParameters() & LOG_REGS) PrintRegister(code, r31mode);
+ // Trace memory accesses.
+
+ // Common, contiguous register accesses (such as for scalars).
+ // The *Write variants automatically set kPrintRegPartial on the format.
+ void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
+ void PrintExtendingRead(int rt_code,
+ PrintRegisterFormat format,
+ int access_size_in_bytes,
+ uintptr_t address);
+ void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
+ void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
+ void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
+ // Simple, unpredicated SVE accesses always access the whole vector, and never
+ // know the lane type, so there's no need to accept a `format`.
+ void PrintZRead(int rt_code, uintptr_t address) {
+ vregisters_[rt_code].NotifyRegisterLogged();
+ PrintZAccess(rt_code, "<-", address);
}
- void LogVRegister(unsigned code, PrintRegisterFormat format) {
- if (GetTraceParameters() & LOG_VREGS) PrintVRegister(code, format);
+ void PrintZWrite(int rt_code, uintptr_t address) {
+ PrintZAccess(rt_code, "->", address);
}
- void LogSystemRegister(SystemRegister id) {
- if (GetTraceParameters() & LOG_SYSREGS) PrintSystemRegister(id);
+ void PrintPRead(int rt_code, uintptr_t address) {
+ pregisters_[rt_code].NotifyRegisterLogged();
+ PrintPAccess(rt_code, "<-", address);
+ }
+ void PrintPWrite(int rt_code, uintptr_t address) {
+ PrintPAccess(rt_code, "->", address);
}
- void LogTakenBranch(const Instruction* target) {
- if (GetTraceParameters() & LOG_BRANCH) PrintTakenBranch(target);
- }
-
- // Print memory accesses.
- void PrintRead(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format);
- void PrintWrite(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format);
- void PrintVRead(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format,
- unsigned lane);
- void PrintVWrite(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format,
- unsigned lane);
// Like Print* (above), but respect GetTraceParameters().
- void LogRead(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format) {
- if (GetTraceParameters() & LOG_REGS) PrintRead(address, reg_code, format);
- }
- void LogWrite(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format) {
- if (GetTraceParameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
- }
- void LogVRead(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format,
- unsigned lane = 0) {
- if (GetTraceParameters() & LOG_VREGS) {
- PrintVRead(address, reg_code, format, lane);
+ void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+ if (ShouldTraceRegs()) PrintRead(rt_code, format, address);
+ }
+ void LogExtendingRead(int rt_code,
+ PrintRegisterFormat format,
+ int access_size_in_bytes,
+ uintptr_t address) {
+ if (ShouldTraceRegs()) {
+ PrintExtendingRead(rt_code, format, access_size_in_bytes, address);
}
}
- void LogVWrite(uintptr_t address,
- unsigned reg_code,
- PrintRegisterFormat format,
- unsigned lane = 0) {
- if (GetTraceParameters() & LOG_WRITE) {
- PrintVWrite(address, reg_code, format, lane);
- }
+ void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+ if (ShouldTraceWrites()) PrintWrite(rt_code, format, address);
+ }
+ void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+ if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address);
+ }
+ void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+ if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address);
+ }
+ void LogZRead(int rt_code, uintptr_t address) {
+ if (ShouldTraceVRegs()) PrintZRead(rt_code, address);
+ }
+ void LogZWrite(int rt_code, uintptr_t address) {
+ if (ShouldTraceWrites()) PrintZWrite(rt_code, address);
+ }
+ void LogPRead(int rt_code, uintptr_t address) {
+ if (ShouldTraceVRegs()) PrintPRead(rt_code, address);
+ }
+ void LogPWrite(int rt_code, uintptr_t address) {
+ if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
+ }
+
+ // Helpers for the above, where the access operation is parameterised.
+ // - For loads, set op = "<-".
+ // - For stores, set op = "->".
+ void PrintAccess(int rt_code,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address);
+ void PrintVAccess(int rt_code,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address);
+ // Simple, unpredicated SVE accesses always access the whole vector, and never
+ // know the lane type, so these don't accept a `format`.
+ void PrintZAccess(int rt_code, const char* op, uintptr_t address);
+ void PrintPAccess(int rt_code, const char* op, uintptr_t address);
+
+ // Multiple-structure accesses.
+ void PrintVStructAccess(int rt_code,
+ int reg_count,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address);
+ // Single-structure (single-lane) accesses.
+ void PrintVSingleStructAccess(int rt_code,
+ int reg_count,
+ int lane,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address);
+ // Replicating accesses.
+ void PrintVReplicatingStructAccess(int rt_code,
+ int reg_count,
+ PrintRegisterFormat format,
+ const char* op,
+ uintptr_t address);
+
+ // Multiple-structure accesses.
+ void PrintZStructAccess(int rt_code,
+ int reg_count,
+ const LogicPRegister& pg,
+ PrintRegisterFormat format,
+ int msize_in_bytes,
+ const char* op,
+ const LogicSVEAddressVector& addr);
+
+ // Register-printing helper for all structured accessors.
+ //
+ // All lanes (according to `format`) are printed, but lanes indicated by
+ // `focus_mask` are of particular interest. Each bit corresponds to a byte in
+ // the printed register, in a manner similar to SVE's predicates. Currently,
+ // this is used to determine when to print human-readable FP annotations.
+ void PrintVRegistersForStructuredAccess(int rt_code,
+ int reg_count,
+ uint16_t focus_mask,
+ PrintRegisterFormat format);
+
+ // As for the VRegister variant, but print partial Z register names.
+ void PrintZRegistersForStructuredAccess(int rt_code,
+ int q_index,
+ int reg_count,
+ uint16_t focus_mask,
+ PrintRegisterFormat format);
+
+ // Print part of a memory access. This should be used for annotating
+ // non-trivial accesses, such as structured or sign-extending loads. Call
+ // Print*Register (or Print*RegistersForStructuredAccess), then
+ // PrintPartialAccess for each contiguous access that makes up the
+ // instruction.
+ //
+ // access_mask:
+ // The lanes to be printed. Each bit corresponds to a byte in the printed
+ // register, in a manner similar to SVE's predicates, except that the
+ // lane size is not respected when interpreting lane_mask: unaligned bits
+ // must be zeroed.
+ //
+ // This function asserts that this mask is non-zero.
+ //
+ // future_access_mask:
+ // The lanes to be printed by a future invocation. This must be specified
+ // because vertical lines are drawn for partial accesses that haven't yet
+ // been printed. The format is the same as for accessed_mask.
+ //
+ // If a lane is active in both `access_mask` and `future_access_mask`,
+ // `access_mask` takes precedence.
+ //
+ // struct_element_count:
+ // The number of elements in each structure. For non-structured accesses,
+ // set this to one. Along with lane_size_in_bytes, this is used determine
+ // the size of each access, and to format the accessed value.
+ //
+ // op:
+ // For stores, use "->". For loads, use "<-".
+ //
+ // address:
+ // The address of this partial access. (Not the base address of the whole
+ // instruction.) The traced value is read from this address (according to
+ // part_count and lane_size_in_bytes) so it must be accessible, and when
+ // tracing stores, the store must have been executed before this function
+ // is called.
+ //
+ // reg_size_in_bytes:
+ // The size of the register being accessed. This helper is usually used
+ // for V registers or Q-sized chunks of Z registers, so that is the
+ // default, but it is possible to use this to annotate X register
+ // accesses by specifying kXRegSizeInBytes.
+ //
+ // The return value is a future_access_mask suitable for the next iteration,
+ // so that it is possible to execute this in a loop, until the mask is zero.
+ // Note that accessed_mask must still be updated by the caller for each call.
+ uint16_t PrintPartialAccess(uint16_t access_mask,
+ uint16_t future_access_mask,
+ int struct_element_count,
+ int lane_size_in_bytes,
+ const char* op,
+ uintptr_t address,
+ int reg_size_in_bytes = kQRegSizeInBytes);
+
+ // Print an abstract register value. This works for all register types, and
+ // can print parts of registers. This exists to ensure consistent formatting
+ // of values.
+ void PrintRegisterValue(const uint8_t* value,
+ int value_size,
+ PrintRegisterFormat format);
+ template <typename T>
+ void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) {
+ PrintRegisterValue(sim_register.GetBytes(),
+ std::min(sim_register.GetSizeInBytes(),
+ kQRegSizeInBytes),
+ format);
}
- // Helper functions for register tracing.
- void PrintRegisterRawHelper(unsigned code,
- Reg31Mode r31mode,
- int size_in_bytes = kXRegSizeInBytes);
- void PrintVRegisterRawHelper(unsigned code,
- int bytes = kQRegSizeInBytes,
- int lsb = 0);
- void PrintVRegisterFPHelper(unsigned code,
- unsigned lane_size_in_bytes,
- int lane_count = 1,
- int rightmost_lane = 0);
+ // As above, but format as an SVE predicate value, using binary notation with
+ // spaces between each bit so that they align with the Z register bytes that
+ // they predicate.
+ void PrintPRegisterValue(uint16_t value);
+
+ void PrintRegisterValueFPAnnotations(const uint8_t* value,
+ uint16_t lane_mask,
+ PrintRegisterFormat format);
+ template <typename T>
+ void PrintRegisterValueFPAnnotations(const T& sim_register,
+ uint16_t lane_mask,
+ PrintRegisterFormat format) {
+ PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format);
+ }
+ template <typename T>
+ void PrintRegisterValueFPAnnotations(const T& sim_register,
+ PrintRegisterFormat format) {
+ PrintRegisterValueFPAnnotations(sim_register.GetBytes(),
+ GetPrintRegLaneMask(format),
+ format);
+ }
VIXL_NO_RETURN void DoUnreachable(const Instruction* instr);
void DoTrace(const Instruction* instr);
@@ -1587,10 +2324,13 @@ class Simulator : public DecoderVisitor {
Reg31Mode mode = Reg31IsZeroRegister);
static const char* XRegNameForCode(unsigned code,
Reg31Mode mode = Reg31IsZeroRegister);
+ static const char* BRegNameForCode(unsigned code);
static const char* HRegNameForCode(unsigned code);
static const char* SRegNameForCode(unsigned code);
static const char* DRegNameForCode(unsigned code);
static const char* VRegNameForCode(unsigned code);
+ static const char* ZRegNameForCode(unsigned code);
+ static const char* PRegNameForCode(unsigned code);
bool IsColouredTrace() const { return coloured_trace_; }
VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) {
@@ -1609,18 +2349,28 @@ class Simulator : public DecoderVisitor {
return GetTraceParameters();
}
+ bool ShouldTraceWrites() const {
+ return (GetTraceParameters() & LOG_WRITE) != 0;
+ }
+ bool ShouldTraceRegs() const {
+ return (GetTraceParameters() & LOG_REGS) != 0;
+ }
+ bool ShouldTraceVRegs() const {
+ return (GetTraceParameters() & LOG_VREGS) != 0;
+ }
+ bool ShouldTraceSysRegs() const {
+ return (GetTraceParameters() & LOG_SYSREGS) != 0;
+ }
+ bool ShouldTraceBranches() const {
+ return (GetTraceParameters() & LOG_BRANCH) != 0;
+ }
+
void SetTraceParameters(int parameters);
VIXL_DEPRECATED("SetTraceParameters",
void set_trace_parameters(int parameters)) {
SetTraceParameters(parameters);
}
- void SetInstructionStats(bool value);
- VIXL_DEPRECATED("SetInstructionStats",
- void set_instruction_stats(bool value)) {
- SetInstructionStats(value);
- }
-
// Clear the simulated local monitor to force the next store-exclusive
// instruction to fail.
void ClearLocalMonitor() { local_monitor_.Clear(); }
@@ -1803,6 +2553,92 @@ class Simulator : public DecoderVisitor {
};
#endif
+ // Configure the simulated value of 'VL', which is the size of a Z register.
+ // Because this cannot occur during a program's lifetime, this function also
+ // resets the SVE registers.
+ void SetVectorLengthInBits(unsigned vector_length);
+
+ unsigned GetVectorLengthInBits() const { return vector_length_; }
+ unsigned GetVectorLengthInBytes() const {
+ VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0);
+ return vector_length_ / kBitsPerByte;
+ }
+ unsigned GetPredicateLengthInBits() const {
+ VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0);
+ return GetVectorLengthInBits() / kZRegBitsPerPRegBit;
+ }
+ unsigned GetPredicateLengthInBytes() const {
+ VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0);
+ return GetVectorLengthInBytes() / kZRegBitsPerPRegBit;
+ }
+
+ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const {
+ if (IsSVEFormat(vform)) {
+ return GetVectorLengthInBits();
+ } else {
+ return vixl::aarch64::RegisterSizeInBitsFromFormat(vform);
+ }
+ }
+
+ unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const {
+ unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform);
+ VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
+ return size_in_bits / kBitsPerByte;
+ }
+
+ int LaneCountFromFormat(VectorFormat vform) const {
+ if (IsSVEFormat(vform)) {
+ return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform);
+ } else {
+ return vixl::aarch64::LaneCountFromFormat(vform);
+ }
+ }
+
+ bool IsFirstActive(VectorFormat vform,
+ const LogicPRegister& mask,
+ const LogicPRegister& bits) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (mask.IsActive(vform, i)) {
+ return bits.IsActive(vform, i);
+ }
+ }
+ return false;
+ }
+
+ bool AreNoneActive(VectorFormat vform,
+ const LogicPRegister& mask,
+ const LogicPRegister& bits) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool IsLastActive(VectorFormat vform,
+ const LogicPRegister& mask,
+ const LogicPRegister& bits) {
+ for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+ if (mask.IsActive(vform, i)) {
+ return bits.IsActive(vform, i);
+ }
+ }
+ return false;
+ }
+
+ void PredTest(VectorFormat vform,
+ const LogicPRegister& mask,
+ const LogicPRegister& bits) {
+ ReadNzcv().SetN(IsFirstActive(vform, mask, bits));
+ ReadNzcv().SetZ(AreNoneActive(vform, mask, bits));
+ ReadNzcv().SetC(!IsLastActive(vform, mask, bits));
+ ReadNzcv().SetV(0);
+ LogSystemRegister(NZCV);
+ }
+
+ SimPRegister& GetPTrue() { return pregister_all_true_; }
+
protected:
const char* clr_normal;
const char* clr_flag_name;
@@ -1811,6 +2647,8 @@ class Simulator : public DecoderVisitor {
const char* clr_reg_value;
const char* clr_vreg_name;
const char* clr_vreg_value;
+ const char* clr_preg_name;
+ const char* clr_preg_value;
const char* clr_memory_address;
const char* clr_warning;
const char* clr_warning_message;
@@ -1818,6 +2656,13 @@ class Simulator : public DecoderVisitor {
const char* clr_branch_marker;
// Simulation helpers ------------------------------------
+
+ void ResetSystemRegisters();
+ void ResetRegisters();
+ void ResetVRegisters();
+ void ResetPRegisters();
+ void ResetFFR();
+
bool ConditionPassed(Condition cond) {
switch (cond) {
case eq:
@@ -1907,7 +2752,7 @@ class Simulator : public DecoderVisitor {
}
int64_t ShiftOperand(unsigned reg_size,
- int64_t value,
+ uint64_t value,
Shift shift_type,
unsigned amount) const;
int64_t ExtendValue(unsigned reg_width,
@@ -1919,6 +2764,11 @@ class Simulator : public DecoderVisitor {
void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
+ void ld1r(VectorFormat vform,
+ VectorFormat unpack_vform,
+ LogicVRegister dst,
+ uint64_t addr,
+ bool is_signed = false);
void ld2(VectorFormat vform,
LogicVRegister dst1,
LogicVRegister dst2,
@@ -2020,16 +2870,43 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ // Add `value` to each lane of `src1`, treating `value` as unsigned for the
+ // purposes of setting the saturation flags.
+ LogicVRegister add_uint(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ uint64_t value);
LogicVRegister addp(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ LogicPRegister brka(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn);
+ LogicPRegister brkb(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn);
+ LogicPRegister brkn(LogicPRegister pdm,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn);
+ LogicPRegister brkpa(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn,
+ const LogicPRegister& pm);
+ LogicPRegister brkpb(LogicPRegister pd,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn,
+ const LogicPRegister& pm);
+ // dst = srca + src1 * src2
LogicVRegister mla(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ // dst = srca - src1 * src2
LogicVRegister mls(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2);
LogicVRegister mul(VectorFormat vform,
@@ -2055,6 +2932,14 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ LogicVRegister sdiv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister udiv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
LogicVRegister dst,
@@ -2101,6 +2986,10 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
+ LogicVRegister smulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
LogicVRegister smull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -2161,6 +3050,10 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
+ LogicVRegister umulh(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
LogicVRegister sqdmull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -2225,6 +3118,12 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ // Subtract `value` from each lane of `src1`, treating `value` as unsigned for
+ // the purposes of setting the saturation flags.
+ LogicVRegister sub_uint(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ uint64_t value);
LogicVRegister and_(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -2267,6 +3166,9 @@ class Simulator : public DecoderVisitor {
LogicVRegister clz(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
+ LogicVRegister cnot(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
LogicVRegister cnt(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
@@ -2278,8 +3180,11 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src);
LogicVRegister rev(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src,
- int revSize);
+ const LogicVRegister& src);
+ LogicVRegister rev_byte(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int rev_size);
LogicVRegister rev16(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
@@ -2327,6 +3232,7 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
+ const LogicVRegister& acc,
int index,
int rot);
LogicVRegister fcmla(VectorFormat vform,
@@ -2335,17 +3241,25 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src2,
int index,
int rot);
- template <typename T>
LogicVRegister fcmla(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
+ const LogicVRegister& acc,
int rot);
- LogicVRegister fcmla(VectorFormat vform,
+ template <typename T>
+ LogicVRegister fadda(VectorFormat vform,
+ LogicVRegister acc,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister fadda(VectorFormat vform,
+ LogicVRegister acc,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister index(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int rot);
+ uint64_t start,
+ uint64_t step);
LogicVRegister ins_element(VectorFormat vform,
LogicVRegister dst,
int dst_index,
@@ -2355,13 +3269,36 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
int dst_index,
uint64_t imm);
+ LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm);
LogicVRegister dup_element(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int src_index);
+ LogicVRegister dup_elements_to_segments(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int src_index);
LogicVRegister dup_immediate(VectorFormat vform,
LogicVRegister dst,
uint64_t imm);
+ LogicVRegister mov(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src);
+ LogicVRegister mov_merging(VectorFormat vform,
+ LogicVRegister dst,
+ const SimPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister mov_zeroing(VectorFormat vform,
+ LogicVRegister dst,
+ const SimPRegister& pg,
+ const LogicVRegister& src);
+ LogicPRegister mov_merging(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src);
+ LogicPRegister mov_zeroing(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src);
LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);
LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);
LogicVRegister orr(VectorFormat vform,
@@ -2376,6 +3313,32 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ // Perform a "conditional last" operation. The first part of the pair is true
+ // if any predicate lane is active, false otherwise. The second part takes the
+ // value of the last active (plus offset) lane, or last (plus offset) lane if
+ // none active.
+ std::pair<bool, uint64_t> clast(VectorFormat vform,
+ const LogicPRegister& pg,
+ const LogicVRegister& src2,
+ int offset_from_last_active);
+ LogicVRegister compact(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister splice(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister sel(VectorFormat vform,
+ LogicVRegister dst,
+ const SimPRegister& pg,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicPRegister sel(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src1,
+ const LogicPRegister& src2);
LogicVRegister sminmax(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -2416,6 +3379,7 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src);
LogicVRegister sminmaxv(VectorFormat vform,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src,
bool max);
LogicVRegister smaxv(VectorFormat vform,
@@ -2436,6 +3400,14 @@ class Simulator : public DecoderVisitor {
LogicVRegister sxtl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
+ LogicVRegister uxt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ unsigned from_size_in_bits);
+ LogicVRegister sxt(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ unsigned from_size_in_bits);
LogicVRegister tbl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& tab,
@@ -2460,6 +3432,10 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& ind);
LogicVRegister Table(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& src,
+ const LogicVRegister& tab);
+ LogicVRegister Table(VectorFormat vform,
+ LogicVRegister dst,
const LogicVRegister& ind,
bool zero_out_of_bounds,
const LogicVRegister* tab1,
@@ -2580,6 +3556,7 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src2);
LogicVRegister uminmaxv(VectorFormat vform,
LogicVRegister dst,
+ const LogicPRegister& pg,
const LogicVRegister& src,
bool max);
LogicVRegister umaxv(VectorFormat vform,
@@ -2617,11 +3594,27 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src,
int shift);
LogicVRegister scvtf(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits = 0);
+ LogicVRegister scvtf(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int fbits,
FPRounding rounding_mode);
LogicVRegister ucvtf(VectorFormat vform,
+ unsigned dst_data_size,
+ unsigned src_data_size,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits = 0);
+ LogicVRegister ucvtf(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int fbits,
@@ -2706,9 +3699,9 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src);
LogicVRegister extractnarrow(VectorFormat vform,
LogicVRegister dst,
- bool dstIsSigned,
+ bool dst_is_signed,
const LogicVRegister& src,
- bool srcIsSigned);
+ bool src_is_signed);
LogicVRegister xtn(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
@@ -2725,7 +3718,7 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
- bool issigned);
+ bool is_signed);
LogicVRegister saba(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -2951,19 +3944,23 @@ class Simulator : public DecoderVisitor {
template <typename T>
LogicVRegister fmla(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2);
LogicVRegister fmla(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2);
template <typename T>
LogicVRegister fmls(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2);
LogicVRegister fmls(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& srca,
const LogicVRegister& src1,
const LogicVRegister& src2);
LogicVRegister fnmul(VectorFormat vform,
@@ -3023,6 +4020,31 @@ class Simulator : public DecoderVisitor {
LogicVRegister frecpx(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
+ LogicVRegister ftsmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ftssel(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ftmad(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ unsigned index);
+ LogicVRegister fexpa(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+ template <typename T>
+ LogicVRegister fscale(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister fscale(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
template <typename T>
LogicVRegister fabs_(VectorFormat vform,
LogicVRegister dst,
@@ -3034,19 +4056,40 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
-
LogicVRegister frint(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
FPRounding rounding_mode,
bool inexact_exception = false,
FrintMode frint_mode = kFrintToInteger);
+ LogicVRegister fcvt(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister fcvts(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits = 0);
LogicVRegister fcvts(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
FPRounding rounding_mode,
int fbits = 0);
LogicVRegister fcvtu(VectorFormat vform,
+ unsigned dst_data_size_in_bits,
+ unsigned src_data_size_in_bits,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src,
+ FPRounding round,
+ int fbits = 0);
+ LogicVRegister fcvtu(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
FPRounding rounding_mode,
@@ -3086,16 +4129,78 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src);
+ LogicPRegister pfalse(LogicPRegister dst);
+ LogicPRegister pfirst(LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src);
+ LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern);
+ LogicPRegister pnext(VectorFormat vform,
+ LogicPRegister dst,
+ const LogicPRegister& pg,
+ const LogicPRegister& src);
+
+ LogicVRegister asrd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ int shift);
+
+ LogicVRegister andv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister eorv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister orv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister saddv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister sminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister smaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister uaddv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister uminv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+ LogicVRegister umaxv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src);
+
template <typename T>
- struct TFPMinMaxOp {
+ struct TFPPairOp {
typedef T (Simulator::*type)(T a, T b);
};
template <typename T>
- LogicVRegister fminmaxv(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src,
- typename TFPMinMaxOp<T>::type Op);
+ LogicVRegister FPPairedAcrossHelper(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ typename TFPPairOp<T>::type fn,
+ uint64_t inactive_value);
+
+ LogicVRegister FPPairedAcrossHelper(
+ VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ typename TFPPairOp<vixl::internal::SimFloat16>::type fn16,
+ typename TFPPairOp<float>::type fn32,
+ typename TFPPairOp<double>::type fn64,
+ uint64_t inactive_value);
LogicVRegister fminv(VectorFormat vform,
LogicVRegister dst,
@@ -3109,6 +4214,9 @@ class Simulator : public DecoderVisitor {
LogicVRegister fmaxnmv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
+ LogicVRegister faddv(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
static const uint32_t CRC32_POLY = 0x04C11DB7;
static const uint32_t CRC32C_POLY = 0x1EDC6F41;
@@ -3209,6 +4317,129 @@ class Simulator : public DecoderVisitor {
void DoSaveCPUFeatures(const Instruction* instr);
void DoRestoreCPUFeatures(const Instruction* instr);
+ // General arithmetic helpers ----------------------------
+
+ // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or
+ // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must
+ // be a valid int<n>_t.
+ uint64_t IncDecN(uint64_t acc,
+ int64_t delta,
+ unsigned n,
+ bool is_saturating = false,
+ bool is_signed = false);
+
+ // SVE helpers -------------------------------------------
+ LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op,
+ VectorFormat vform,
+ LogicVRegister zd,
+ const LogicVRegister& zn,
+ const LogicVRegister& zm);
+
+ LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
+ LogicPRegister Pd,
+ const LogicPRegister& pn,
+ const LogicPRegister& pm);
+
+ LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,
+ VectorFormat vform,
+ LogicVRegister zd,
+ uint64_t imm);
+ enum UnpackType { kHiHalf, kLoHalf };
+ enum ExtendType { kSignedExtend, kUnsignedExtend };
+ LogicVRegister unpk(VectorFormat vform,
+ LogicVRegister zd,
+ const LogicVRegister& zn,
+ UnpackType unpack_type,
+ ExtendType extend_type);
+
+ LogicPRegister SVEIntCompareVectorsHelper(Condition cc,
+ VectorFormat vform,
+ LogicPRegister dst,
+ const LogicPRegister& mask,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool is_wide_elements = false,
+ FlagsUpdate flags = SetFlags);
+
+ void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
+ VectorFormat vform,
+ SVEOffsetModifier mod);
+
+ // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`.
+ //
+ // `zt_code` specifies the code of the first register (zt). Each additional
+ // register (up to `reg_count`) is `(zt_code + i) % 32`.
+ //
+ // This helper calls LogZWrite in the proper way, according to `addr`.
+ void SVEStructuredStoreHelper(VectorFormat vform,
+ const LogicPRegister& pg,
+ unsigned zt_code,
+ const LogicSVEAddressVector& addr);
+ // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
+ void SVEStructuredLoadHelper(VectorFormat vform,
+ const LogicPRegister& pg,
+ unsigned zt_code,
+ const LogicSVEAddressVector& addr,
+ bool is_signed = false);
+
+ enum SVEFaultTolerantLoadType {
+ // - Elements active in both FFR and pg are accessed as usual. If the access
+ // fails, the corresponding lane and all subsequent lanes are filled with
+ // an unpredictable value, and made inactive in FFR.
+ //
+ // - Elements active in FFR but not pg are set to zero.
+ //
+ // - Elements that are not active in FFR are filled with an unpredictable
+ // value, regardless of pg.
+ kSVENonFaultLoad,
+
+ // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the
+ // first active element is always accessed, regardless of FFR, and will
+ // generate a real fault if it is inaccessible. If the lane is not active in
+ // FFR, the actual value loaded into the result is still unpredictable.
+ kSVEFirstFaultLoad
+ };
+
+ // Load with first-faulting or non-faulting load semantics, respecting and
+ // updating FFR.
+ void SVEFaultTolerantLoadHelper(VectorFormat vform,
+ const LogicPRegister& pg,
+ unsigned zt_code,
+ const LogicSVEAddressVector& addr,
+ SVEFaultTolerantLoadType type,
+ bool is_signed);
+
+ LogicVRegister SVEBitwiseShiftHelper(Shift shift_op,
+ VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool is_wide_elements);
+
+ template <typename T>
+ LogicVRegister FTMaddHelper(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ uint64_t coeff_pos,
+ uint64_t coeff_neg);
+
+ // Return the first or last active lane, or -1 if none are active.
+ int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const;
+ int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const;
+
+ int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const;
+
+ // Count active and true lanes in `pn`.
+ int CountActiveAndTrueLanes(VectorFormat vform,
+ const LogicPRegister& pg,
+ const LogicPRegister& pn) const;
+
+ // Count the number of lanes referred to by `pattern`, given the vector
+ // length. If `pattern` is not a recognised SVEPredicateConstraint, this
+ // returns zero.
+ int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const;
+
// Simulate a runtime call.
void DoRuntimeCall(const Instruction* instr);
@@ -3222,15 +4453,21 @@ class Simulator : public DecoderVisitor {
FILE* stream_;
PrintDisassembler* print_disasm_;
- // Instruction statistics instrumentation.
- Instrument* instrumentation_;
-
// General purpose registers. Register 31 is the stack pointer.
SimRegister registers_[kNumberOfRegisters];
// Vector registers
SimVRegister vregisters_[kNumberOfVRegisters];
+ // SVE predicate registers.
+ SimPRegister pregisters_[kNumberOfPRegisters];
+
+ // SVE first-fault register.
+ SimFFRRegister ffr_register_;
+
+ // A pseudo SVE predicate register with all bits set to true.
+ SimPRegister pregister_all_true_;
+
// Program Status Register.
// bits[31, 27]: Condition flags N, Z, C, and V.
// (Negative, Zero, Carry, Overflow)
@@ -3266,8 +4503,10 @@ class Simulator : public DecoderVisitor {
// Stack
byte* stack_;
static const int stack_protection_size_ = 256;
- // 2 KB stack.
- static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_;
+ // 8 KB stack.
+ // TODO: Make this configurable, or automatically allocate space as it runs
+ // out (like the OS would try to do).
+ static const int stack_size_ = 8 * 1024 + 2 * stack_protection_size_;
byte* stack_limit_;
Decoder* decoder_;
@@ -3276,6 +4515,10 @@ class Simulator : public DecoderVisitor {
bool pc_modified_;
const Instruction* pc_;
+ // If non-NULL, the last instruction was a movprfx, and validity needs to be
+ // checked.
+ Instruction const* movprfx_;
+
// Branch type register, used for branch target identification.
BType btype_;
@@ -3289,10 +4532,13 @@ class Simulator : public DecoderVisitor {
static const char* xreg_names[];
static const char* wreg_names[];
+ static const char* breg_names[];
static const char* hreg_names[];
static const char* sreg_names[];
static const char* dreg_names[];
static const char* vreg_names[];
+ static const char* zreg_names[];
+ static const char* preg_names[];
private:
static const PACKey kPACKeyIA;
@@ -3301,6 +4547,13 @@ class Simulator : public DecoderVisitor {
static const PACKey kPACKeyDB;
static const PACKey kPACKeyGA;
+ bool CanReadMemory(uintptr_t address, size_t size);
+
+ // CanReadMemory needs dummy file descriptors, so we use a pipe. We can save
+ // some system call overhead by opening them on construction, rather than on
+ // every call to CanReadMemory.
+ int dummy_pipe_fd_[2];
+
template <typename T>
static T FPDefaultNaN();
@@ -3353,14 +4606,24 @@ class Simulator : public DecoderVisitor {
}
}
+ // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of
+ // the destination is set to all true (0xff) when the corresponding
+ // predicate flag is set, and false (0x00) otherwise.
+ SimVRegister ExpandToSimVRegister(const SimPRegister& preg);
+
+ // Set each predicate flag in pd where the corresponding assigned-sized lane
+ // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite
+ // operation to ExpandToSimVRegister(), except that any non-zero lane is
+ // interpreted as true.
+ void ExtractFromSimVRegister(VectorFormat vform,
+ SimPRegister& pd, // NOLINT(runtime/references)
+ SimVRegister vreg);
+
bool coloured_trace_;
// A set of TraceParameters flags.
int trace_parameters_;
- // Indicates whether the instruction instrumentation is active.
- bool instruction_stats_;
-
// Indicates whether the exclusive-access warning has been printed.
bool print_exclusive_access_warning_;
void PrintExclusiveAccessWarning();
@@ -3368,8 +4631,14 @@ class Simulator : public DecoderVisitor {
CPUFeaturesAuditor cpu_features_auditor_;
std::vector<CPUFeatures> saved_cpu_features_;
- // The simulated state of RNDR and RNDRRS for generating a random number.
- uint16_t rndr_state_[3];
+ // State for *rand48 functions, used to simulate randomness with repeatable
+ // behaviour (so that tests are deterministic). This is used to simulate RNDR
+ // and RNDRRS, as well as to simulate a source of entropy for architecturally
+ // undefined behaviour.
+ uint16_t rand_state_[3];
+
+ // A configurable size of SVE vector registers.
+ unsigned vector_length_;
};
#if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
diff --git a/src/aarch64/simulator-constants-aarch64.h b/src/aarch64/simulator-constants-aarch64.h
index 3256f30e..e2389f11 100644
--- a/src/aarch64/simulator-constants-aarch64.h
+++ b/src/aarch64/simulator-constants-aarch64.h
@@ -121,7 +121,7 @@ const unsigned kTraceLength = 3 * kInstructionSize;
enum TraceParameters {
LOG_DISASM = 1 << 0, // Log disassembly.
LOG_REGS = 1 << 1, // Log general purpose registers.
- LOG_VREGS = 1 << 2, // Log NEON and floating-point registers.
+ LOG_VREGS = 1 << 2, // Log SVE, NEON and floating-point registers.
LOG_SYSREGS = 1 << 3, // Log the flags and system registers.
LOG_WRITE = 1 << 4, // Log writes to memory.
LOG_BRANCH = 1 << 5, // Log taken branches.
diff --git a/src/cpu-features.cc b/src/cpu-features.cc
index ea1e0d3e..08db3f44 100644
--- a/src/cpu-features.cc
+++ b/src/cpu-features.cc
@@ -37,31 +37,9 @@
namespace vixl {
-static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) {
- if (feature == CPUFeatures::kNone) {
- return 0;
- } else {
- // Check that the shift is well-defined, and that the feature is valid.
- VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <=
- (sizeof(uint64_t) * 8));
- VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures);
- return UINT64_C(1) << feature;
- }
-}
-
-CPUFeatures::CPUFeatures(Feature feature0,
- Feature feature1,
- Feature feature2,
- Feature feature3)
- : features_(0) {
- Combine(feature0, feature1, feature2, feature3);
-}
-
CPUFeatures CPUFeatures::All() {
CPUFeatures all;
- // Check that the shift is well-defined.
- VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8));
- all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1;
+ all.features_.set();
return all;
}
@@ -89,74 +67,27 @@ void CPUFeatures::Combine(const CPUFeatures& other) {
features_ |= other.features_;
}
-void CPUFeatures::Combine(Feature feature0,
- Feature feature1,
- Feature feature2,
- Feature feature3) {
- features_ |= MakeFeatureMask(feature0);
- features_ |= MakeFeatureMask(feature1);
- features_ |= MakeFeatureMask(feature2);
- features_ |= MakeFeatureMask(feature3);
+void CPUFeatures::Combine(Feature feature) {
+ if (feature != CPUFeatures::kNone) features_.set(feature);
}
void CPUFeatures::Remove(const CPUFeatures& other) {
features_ &= ~other.features_;
}
-void CPUFeatures::Remove(Feature feature0,
- Feature feature1,
- Feature feature2,
- Feature feature3) {
- features_ &= ~MakeFeatureMask(feature0);
- features_ &= ~MakeFeatureMask(feature1);
- features_ &= ~MakeFeatureMask(feature2);
- features_ &= ~MakeFeatureMask(feature3);
-}
-
-CPUFeatures CPUFeatures::With(const CPUFeatures& other) const {
- CPUFeatures f(*this);
- f.Combine(other);
- return f;
-}
-
-CPUFeatures CPUFeatures::With(Feature feature0,
- Feature feature1,
- Feature feature2,
- Feature feature3) const {
- CPUFeatures f(*this);
- f.Combine(feature0, feature1, feature2, feature3);
- return f;
-}
-
-CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const {
- CPUFeatures f(*this);
- f.Remove(other);
- return f;
-}
-
-CPUFeatures CPUFeatures::Without(Feature feature0,
- Feature feature1,
- Feature feature2,
- Feature feature3) const {
- CPUFeatures f(*this);
- f.Remove(feature0, feature1, feature2, feature3);
- return f;
+void CPUFeatures::Remove(Feature feature) {
+ if (feature != CPUFeatures::kNone) features_.reset(feature);
}
bool CPUFeatures::Has(const CPUFeatures& other) const {
return (features_ & other.features_) == other.features_;
}
-bool CPUFeatures::Has(Feature feature0,
- Feature feature1,
- Feature feature2,
- Feature feature3) const {
- uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) |
- MakeFeatureMask(feature2) | MakeFeatureMask(feature3);
- return (features_ & mask) == mask;
+bool CPUFeatures::Has(Feature feature) const {
+ return (feature == CPUFeatures::kNone) || features_[feature];
}
-size_t CPUFeatures::Count() const { return CountSetBits(features_); }
+size_t CPUFeatures::Count() const { return features_.count(); }
std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
// clang-format off
@@ -177,12 +108,9 @@ VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
}
CPUFeatures::const_iterator CPUFeatures::begin() const {
- if (features_ == 0) return const_iterator(this, kNone);
-
- int feature_number = CountTrailingZeros(features_);
- vixl::CPUFeatures::Feature feature =
- static_cast<CPUFeatures::Feature>(feature_number);
- return const_iterator(this, feature);
+ // For iterators in general, it's undefined to increment `end()`, but here we
+ // control the implementation and it is safe to do this.
+ return ++end();
}
CPUFeatures::const_iterator CPUFeatures::end() const {
@@ -190,11 +118,11 @@ CPUFeatures::const_iterator CPUFeatures::end() const {
}
std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
- CPUFeatures::const_iterator it = features.begin();
- while (it != features.end()) {
- os << *it;
- ++it;
- if (it != features.end()) os << ", ";
+ bool need_separator = false;
+ for (CPUFeatures::Feature feature : features) {
+ if (need_separator) os << ", ";
+ need_separator = true;
+ os << feature;
}
return os;
}
@@ -205,7 +133,7 @@ bool CPUFeaturesConstIterator::operator==(
return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
}
-CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix
+CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() { // Prefix
VIXL_ASSERT(IsValid());
do {
// Find the next feature. The order is unspecified.
@@ -219,11 +147,11 @@ CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix
// cpu_features_->Has(kNone) is always true, so this will terminate even if
// the features list is empty.
} while (!cpu_features_->Has(feature_));
- return feature_;
+ return *this;
}
-CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) { // Postfix
- CPUFeatures::Feature result = feature_;
+CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) { // Postfix
+ CPUFeaturesConstIterator result = *this;
++(*this);
return result;
}
diff --git a/src/cpu-features.h b/src/cpu-features.h
index 50ddc267..1b0f2c24 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -27,6 +27,7 @@
#ifndef VIXL_CPU_FEATURES_H
#define VIXL_CPU_FEATURES_H
+#include <bitset>
#include <ostream>
#include "globals-vixl.h"
@@ -34,16 +35,65 @@
namespace vixl {
+// VIXL aims to handle and detect all architectural features that are likely to
+// influence code-generation decisions at EL0 (user-space).
+//
+// - There may be multiple VIXL feature flags for a given architectural
+// extension. This occurs where the extension allow components to be
+// implemented independently, or where kernel support is needed, and is likely
+// to be fragmented.
+//
+// For example, Pointer Authentication (kPAuth*) has a separate feature flag
+// for access to PACGA, and to indicate that the QARMA algorithm is
+// implemented.
+//
+// - Conversely, some extensions have configuration options that do not affect
+// EL0, so these are presented as a single VIXL feature.
+//
+// For example, the RAS extension (kRAS) has several variants, but the only
+// feature relevant to VIXL is the addition of the ESB instruction so we only
+// need a single flag.
+//
+// - VIXL offers separate flags for separate features even if they're
+// architecturally linked.
+//
+// For example, the architecture requires kFPHalf and kNEONHalf to be equal,
+// but they have separate hardware ID register fields so VIXL presents them as
+// separate features.
+//
+// - VIXL can detect every feature for which it can generate code.
+//
+// - VIXL can detect some features for which it cannot generate code.
+//
+// The CPUFeatures::Feature enum — derived from the macro list below — is
+// frequently extended. New features may be added to the list at any point, and
+// no assumptions should be made about the numerical values assigned to each
+// enum constant. The symbolic names can be considered to be stable.
+//
+// The debug descriptions are used only for debug output. The 'cpuinfo' strings
+// are informative; VIXL does not use /proc/cpuinfo for feature detection.
+
// clang-format off
#define VIXL_CPU_FEATURE_LIST(V) \
/* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \
/* registers, so that the detailed feature registers can be read */ \
/* directly. */ \
+ \
+ /* Constant name Debug description Linux 'cpuinfo' string. */ \
V(kIDRegisterEmulation, "ID register emulation", "cpuid") \
\
V(kFP, "FP", "fp") \
V(kNEON, "NEON", "asimd") \
V(kCRC32, "CRC32", "crc32") \
+ V(kDGH, "DGH", "dgh") \
+ /* Speculation control features. */ \
+ V(kCSV2, "CSV2", NULL) \
+ V(kSCXTNUM, "SCXTNUM", NULL) \
+ V(kCSV3, "CSV3", NULL) \
+ V(kSB, "SB", "sb") \
+ V(kSPECRES, "SPECRES", NULL) \
+ V(kSSBS, "SSBS", NULL) \
+ V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \
/* Cryptographic support instructions. */ \
V(kAES, "AES", "aes") \
V(kSHA1, "SHA1", "sha1") \
@@ -58,28 +108,36 @@ namespace vixl {
V(kRDM, "RDM", "asimdrdm") \
/* Scalable Vector Extension. */ \
V(kSVE, "SVE", "sve") \
+ V(kSVEF64MM, "SVE F64MM", "svef64mm") \
+ V(kSVEF32MM, "SVE F32MM", "svef32mm") \
+ V(kSVEI8MM, "SVE I8MM", "svei8imm") \
+ V(kSVEBF16, "SVE BFloat16", "svebf16") \
/* SDOT and UDOT support (in NEON). */ \
V(kDotProduct, "DotProduct", "asimddp") \
+ /* Int8 matrix multiplication (in NEON). */ \
+ V(kI8MM, "NEON I8MM", "i8mm") \
/* Half-precision (FP16) support for FP and NEON, respectively. */ \
V(kFPHalf, "FPHalf", "fphp") \
V(kNEONHalf, "NEONHalf", "asimdhp") \
+ /* BFloat16 support (in both FP and NEON.) */ \
+ V(kBF16, "FP/NEON BFloat 16", "bf16") \
/* The RAS extension, including the ESB instruction. */ \
V(kRAS, "RAS", NULL) \
/* Data cache clean to the point of persistence: DC CVAP. */ \
V(kDCPoP, "DCPoP", "dcpop") \
/* Data cache clean to the point of deep persistence: DC CVADP. */ \
- V(kDCCVADP, "DCCVADP", NULL) \
+ V(kDCCVADP, "DCCVADP", "dcpodp") \
/* Cryptographic support instructions. */ \
V(kSHA3, "SHA3", "sha3") \
V(kSHA512, "SHA512", "sha512") \
V(kSM3, "SM3", "sm3") \
V(kSM4, "SM4", "sm4") \
/* Pointer authentication for addresses. */ \
- V(kPAuth, "PAuth", NULL) \
+ V(kPAuth, "PAuth", "paca") \
/* Pointer authentication for addresses uses QARMA. */ \
V(kPAuthQARMA, "PAuthQARMA", NULL) \
/* Generic authentication (using the PACGA instruction). */ \
- V(kPAuthGeneric, "PAuthGeneric", NULL) \
+ V(kPAuthGeneric, "PAuthGeneric", "pacg") \
/* Generic authentication uses QARMA. */ \
V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \
/* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \
@@ -98,13 +156,21 @@ namespace vixl {
/* Data-independent timing (for selected instructions). */ \
V(kDIT, "DIT", "dit") \
/* Branch target identification. */ \
- V(kBTI, "BTI", NULL) \
+ V(kBTI, "BTI", "bti") \
/* Flag manipulation instructions: {AX,XA}FLAG */ \
- V(kAXFlag, "AXFlag", NULL) \
+ V(kAXFlag, "AXFlag", "flagm2") \
/* Random number generation extension, */ \
- V(kRNG, "RNG", NULL) \
+ V(kRNG, "RNG", "rng") \
/* Floating-point round to {32,64}-bit integer. */ \
- V(kFrintToFixedSizedInt,"Frint (bounded)", NULL)
+ V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \
+ /* Memory Tagging Extension. */ \
+ V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \
+ V(kMTE, "MTE", NULL) \
+ /* PAuth extensions. */ \
+ V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \
+ V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \
+ V(kPAuthFPAC, "PAuth FPAC", NULL) \
+ V(kPAuthFPACCombined, "PAuth FPACCombined", NULL)
// clang-format on
@@ -197,13 +263,13 @@ class CPUFeatures {
// clang-format on
// By default, construct with no features enabled.
- CPUFeatures() : features_(0) {}
+ CPUFeatures() : features_{} {}
// Construct with some features already enabled.
- CPUFeatures(Feature feature0,
- Feature feature1 = kNone,
- Feature feature2 = kNone,
- Feature feature3 = kNone);
+ template <typename T, typename... U>
+ CPUFeatures(T first, U... others) : features_{} {
+ Combine(first, others...);
+ }
// Construct with all features enabled. This can be used to disable feature
// checking: `Has(...)` returns true regardless of the argument.
@@ -236,41 +302,59 @@ class CPUFeatures {
// exist in this set are left unchanged.
void Combine(const CPUFeatures& other);
- // Combine specific features into this set. Features that already exist in
- // this set are left unchanged.
- void Combine(Feature feature0,
- Feature feature1 = kNone,
- Feature feature2 = kNone,
- Feature feature3 = kNone);
+ // Combine a specific feature into this set. If it already exists in the set,
+ // the set is left unchanged.
+ void Combine(Feature feature);
+
+ // Combine multiple features (or feature sets) into this set.
+ template <typename T, typename... U>
+ void Combine(T first, U... others) {
+ Combine(first);
+ Combine(others...);
+ }
// Remove features in another CPUFeatures object from this one.
void Remove(const CPUFeatures& other);
- // Remove specific features from this set.
- void Remove(Feature feature0,
- Feature feature1 = kNone,
- Feature feature2 = kNone,
- Feature feature3 = kNone);
-
- // Chaining helpers for convenient construction.
- CPUFeatures With(const CPUFeatures& other) const;
- CPUFeatures With(Feature feature0,
- Feature feature1 = kNone,
- Feature feature2 = kNone,
- Feature feature3 = kNone) const;
- CPUFeatures Without(const CPUFeatures& other) const;
- CPUFeatures Without(Feature feature0,
- Feature feature1 = kNone,
- Feature feature2 = kNone,
- Feature feature3 = kNone) const;
-
- // Query features.
- // Note that an empty query (like `Has(kNone)`) always returns true.
+ // Remove a specific feature from this set. This has no effect if the feature
+ // doesn't exist in the set.
+ void Remove(Feature feature0);
+
+ // Remove multiple features (or feature sets) from this set.
+ template <typename T, typename... U>
+ void Remove(T first, U... others) {
+ Remove(first);
+ Remove(others...);
+ }
+
+ // Chaining helpers for convenient construction by combining other CPUFeatures
+ // or individual Features.
+ template <typename... T>
+ CPUFeatures With(T... others) const {
+ CPUFeatures f(*this);
+ f.Combine(others...);
+ return f;
+ }
+
+ template <typename... T>
+ CPUFeatures Without(T... others) const {
+ CPUFeatures f(*this);
+ f.Remove(others...);
+ return f;
+ }
+
+ // Test whether the `other` feature set is equal to or a subset of this one.
bool Has(const CPUFeatures& other) const;
- bool Has(Feature feature0,
- Feature feature1 = kNone,
- Feature feature2 = kNone,
- Feature feature3 = kNone) const;
+
+ // Test whether a single feature exists in this set.
+ // Note that `Has(kNone)` always returns true.
+ bool Has(Feature feature) const;
+
+ // Test whether all of the specified features exist in this set.
+ template <typename T, typename... U>
+ bool Has(T first, U... others) const {
+ return Has(first) && Has(others...);
+ }
// Return the number of enabled features.
size_t Count() const;
@@ -288,9 +372,8 @@ class CPUFeatures {
const_iterator end() const;
private:
- // Each bit represents a feature. This field will be replaced as needed if
- // features are added.
- uint64_t features_;
+ // Each bit represents a feature. This set will be extended as needed.
+ std::bitset<kNumberOfFeatures> features_;
friend std::ostream& operator<<(std::ostream& os,
const vixl::CPUFeatures& features);
@@ -313,8 +396,8 @@ class CPUFeaturesConstIterator {
bool operator!=(const CPUFeaturesConstIterator& other) const {
return !(*this == other);
}
- CPUFeatures::Feature operator++();
- CPUFeatures::Feature operator++(int);
+ CPUFeaturesConstIterator& operator++();
+ CPUFeaturesConstIterator operator++(int);
CPUFeatures::Feature operator*() const {
VIXL_ASSERT(IsValid());
@@ -359,21 +442,17 @@ class CPUFeaturesScope {
// Start a CPUFeaturesScope on any object that implements
// `CPUFeatures* GetCPUFeatures()`.
template <typename T>
- explicit CPUFeaturesScope(T* cpu_features_wrapper,
- CPUFeatures::Feature feature0 = CPUFeatures::kNone,
- CPUFeatures::Feature feature1 = CPUFeatures::kNone,
- CPUFeatures::Feature feature2 = CPUFeatures::kNone,
- CPUFeatures::Feature feature3 = CPUFeatures::kNone)
+ explicit CPUFeaturesScope(T* cpu_features_wrapper)
: cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
- old_features_(*cpu_features_) {
- cpu_features_->Combine(feature0, feature1, feature2, feature3);
- }
+ old_features_(*cpu_features_) {}
- template <typename T>
- CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+ // Start a CPUFeaturesScope on any object that implements
+ // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
+ template <typename T, typename U, typename... V>
+ CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
: cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
old_features_(*cpu_features_) {
- cpu_features_->Combine(other);
+ cpu_features_->Combine(first, features...);
}
~CPUFeaturesScope() { *cpu_features_ = old_features_; }
diff --git a/src/globals-vixl.h b/src/globals-vixl.h
index 640b4b9b..4dc8c024 100644
--- a/src/globals-vixl.h
+++ b/src/globals-vixl.h
@@ -27,6 +27,10 @@
#ifndef VIXL_GLOBALS_H
#define VIXL_GLOBALS_H
+#if __cplusplus < 201402L
+#error VIXL requires C++14
+#endif
+
// Get standard C99 macros for integer types.
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
@@ -66,7 +70,8 @@ typedef uint8_t byte;
const int KBytes = 1024;
const int MBytes = 1024 * KBytes;
-const int kBitsPerByte = 8;
+const int kBitsPerByteLog2 = 3;
+const int kBitsPerByte = 1 << kBitsPerByteLog2;
template <int SizeInBits>
struct Unsigned;
@@ -223,8 +228,11 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
#if __cplusplus >= 201103L
#define VIXL_OVERRIDE override
+#define VIXL_CONSTEXPR constexpr
+#define VIXL_HAS_CONSTEXPR 1
#else
#define VIXL_OVERRIDE
+#define VIXL_CONSTEXPR
#endif
// With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw
diff --git a/src/invalset-vixl.h b/src/invalset-vixl.h
index fbfb6a01..8bd6035e 100644
--- a/src/invalset-vixl.h
+++ b/src/invalset-vixl.h
@@ -842,9 +842,7 @@ InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other)
#if __cplusplus >= 201103L
template <class S>
InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept
- : using_vector_(false),
- index_(0),
- inval_set_(NULL) {
+ : using_vector_(false), index_(0), inval_set_(NULL) {
swap(*this, other);
}
#endif
diff --git a/src/pool-manager-impl.h b/src/pool-manager-impl.h
index 66ecd6a4..a1bcaaad 100644
--- a/src/pool-manager-impl.h
+++ b/src/pool-manager-impl.h
@@ -264,14 +264,14 @@ bool PoolManager<T>::MustEmit(T pc,
if (checkpoint < temp.min_location_) return true;
}
- bool tempNotPlacedYet = true;
+ bool temp_not_placed_yet = true;
for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) {
const PoolObject<T>& current = objects_[i];
- if (tempNotPlacedYet && PoolObjectLessThan(current, temp)) {
+ if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) {
checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
if (checkpoint < temp.min_location_) return true;
if (CheckFuturePC(pc, checkpoint)) return true;
- tempNotPlacedYet = false;
+ temp_not_placed_yet = false;
}
if (current.label_base_ == label_base) continue;
checkpoint = UpdateCheckpointForObject(checkpoint, &current);
@@ -279,7 +279,7 @@ bool PoolManager<T>::MustEmit(T pc,
if (CheckFuturePC(pc, checkpoint)) return true;
}
// temp is the object with the smallest max_location_.
- if (tempNotPlacedYet) {
+ if (temp_not_placed_yet) {
checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
if (checkpoint < temp.min_location_) return true;
}
@@ -497,7 +497,7 @@ PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
}
#endif
// Delete objects the pool manager owns.
- for (typename std::vector<LocationBase<T> *>::iterator
+ for (typename std::vector<LocationBase<T>*>::iterator
iter = delete_on_destruction_.begin(),
end = delete_on_destruction_.end();
iter != end;
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index c9287e40..0ae6dfc0 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -67,7 +67,7 @@ namespace vixl {
#endif
template <typename T, size_t n>
-size_t ArrayLength(const T (&)[n]) {
+constexpr size_t ArrayLength(const T (&)[n]) {
return n;
}
@@ -77,25 +77,30 @@ inline uint64_t GetUintMask(unsigned bits) {
return base - 1;
}
+inline uint64_t GetSignMask(unsigned bits) {
+ VIXL_ASSERT(bits <= 64);
+ return UINT64_C(1) << (bits - 1);
+}
+
// Check number width.
// TODO: Refactor these using templates.
inline bool IsIntN(unsigned n, uint32_t x) {
- VIXL_ASSERT((0 < n) && (n < 32));
- uint32_t limit = UINT32_C(1) << (n - 1);
- return x < limit;
+ VIXL_ASSERT((0 < n) && (n <= 32));
+ return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n));
}
inline bool IsIntN(unsigned n, int32_t x) {
- VIXL_ASSERT((0 < n) && (n < 32));
+ VIXL_ASSERT((0 < n) && (n <= 32));
+ if (n == 32) return true;
int32_t limit = INT32_C(1) << (n - 1);
return (-limit <= x) && (x < limit);
}
inline bool IsIntN(unsigned n, uint64_t x) {
- VIXL_ASSERT((0 < n) && (n < 64));
- uint64_t limit = UINT64_C(1) << (n - 1);
- return x < limit;
+ VIXL_ASSERT((0 < n) && (n <= 64));
+ return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n));
}
inline bool IsIntN(unsigned n, int64_t x) {
- VIXL_ASSERT((0 < n) && (n < 64));
+ VIXL_ASSERT((0 < n) && (n <= 64));
+ if (n == 64) return true;
int64_t limit = INT64_C(1) << (n - 1);
return (-limit <= x) && (x < limit);
}
@@ -104,7 +109,8 @@ VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
}
inline bool IsUintN(unsigned n, uint32_t x) {
- VIXL_ASSERT((0 < n) && (n < 32));
+ VIXL_ASSERT((0 < n) && (n <= 32));
+ if (n >= 32) return true;
return !(x >> n);
}
inline bool IsUintN(unsigned n, int32_t x) {
@@ -113,7 +119,8 @@ inline bool IsUintN(unsigned n, int32_t x) {
return !(static_cast<uint32_t>(x) >> n);
}
inline bool IsUintN(unsigned n, uint64_t x) {
- VIXL_ASSERT((0 < n) && (n < 64));
+ VIXL_ASSERT((0 < n) && (n <= 64));
+ if (n >= 64) return true;
return !(x >> n);
}
inline bool IsUintN(unsigned n, int64_t x) {
@@ -189,7 +196,7 @@ inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
}
-inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) {
+inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) {
VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
(msb >= lsb));
return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
@@ -209,8 +216,7 @@ inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) {
return result;
}
-
-inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) {
+inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) {
VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
(msb >= lsb));
uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
@@ -219,7 +225,6 @@ inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) {
return result;
}
-
inline uint64_t RotateRight(uint64_t value,
unsigned int rotate,
unsigned int width) {
@@ -277,6 +282,19 @@ VIXL_DEPRECATED("RawbitsToDouble",
return RawbitsToDouble(bits);
}
+// Convert unsigned to signed numbers in a well-defined way (using two's
+// complement representations).
+inline int64_t RawbitsToInt64(uint64_t bits) {
+ return (bits >= UINT64_C(0x8000000000000000))
+ ? (-static_cast<int64_t>(-bits - 1) - 1)
+ : static_cast<int64_t>(bits);
+}
+
+inline int32_t RawbitsToInt32(uint32_t bits) {
+ return (bits >= UINT64_C(0x80000000)) ? (-static_cast<int32_t>(-bits - 1) - 1)
+ : static_cast<int32_t>(bits);
+}
+
namespace internal {
// Internal simulation class used solely by the simulator to
@@ -371,6 +389,10 @@ VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
bool IsZero(Float16 value);
+inline bool IsPositiveZero(double value) {
+ return (value == 0.0) && (copysign(1.0, value) > 0.0);
+}
+
inline bool IsNaN(float value) { return std::isnan(value); }
inline bool IsNaN(double value) { return std::isnan(value); }
@@ -490,11 +512,11 @@ T ReverseBits(T value) {
template <typename T>
-inline T SignExtend(T val, int bitSize) {
- VIXL_ASSERT(bitSize > 0);
- T mask = (T(2) << (bitSize - 1)) - T(1);
+inline T SignExtend(T val, int size_in_bits) {
+ VIXL_ASSERT(size_in_bits > 0);
+ T mask = (T(2) << (size_in_bits - 1)) - T(1);
val &= mask;
- T sign_bits = -((val >> (bitSize - 1)) << bitSize);
+ T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits);
val |= sign_bits;
return val;
}
@@ -576,7 +598,7 @@ T AlignUp(T pointer,
// reinterpret_cast behaviour for other types.
typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
- (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+ (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
size_t mask = alignment - 1;
@@ -596,7 +618,7 @@ T AlignDown(T pointer,
// reinterpret_cast behaviour for other types.
typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
- (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+ (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
size_t mask = alignment - 1;
@@ -980,6 +1002,42 @@ Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}
Int64 BitCount(Uint32 value);
+// The algorithm used is adapted from the one described in section 8.2 of
+// Hacker's Delight, by Henry S. Warren, Jr.
+template <unsigned N, typename T>
+int64_t MultiplyHigh(T u, T v) {
+ uint64_t u0, v0, w0, u1, v1, w1, w2, t;
+ VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64));
+ uint64_t sign_mask = UINT64_C(1) << (N - 1);
+ uint64_t sign_ext = 0;
+ unsigned half_bits = N / 2;
+ uint64_t half_mask = GetUintMask(half_bits);
+ if (std::numeric_limits<T>::is_signed) {
+ sign_ext = UINT64_C(0xffffffffffffffff) << half_bits;
+ }
+
+ VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
+ VIXL_ASSERT(sizeof(u) == sizeof(u0));
+
+ u0 = u & half_mask;
+ u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0);
+ v0 = v & half_mask;
+ v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0);
+
+ w0 = u0 * v0;
+ t = u1 * v0 + (w0 >> half_bits);
+
+ w1 = t & half_mask;
+ w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0);
+ w1 = u0 * v1 + w1;
+ w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0);
+
+ uint64_t value = u1 * v1 + w2 + w1;
+ int64_t result;
+ memcpy(&result, &value, sizeof(result));
+ return result;
+}
+
} // namespace internal
// The default NaN values (for FPCR.DN=1).
@@ -1244,9 +1302,8 @@ inline Float16 FPRoundToFloat16(int64_t sign,
uint64_t mantissa,
FPRounding round_mode) {
return RawbitsToFloat16(
- FPRound<uint16_t,
- kFloat16ExponentBits,
- kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
+ FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>(
+ sign, exponent, mantissa, round_mode));
}
@@ -1282,6 +1339,62 @@ Float16 FPToFloat16(double value,
FPRounding round_mode,
UseDefaultNaN DN,
bool* exception = NULL);
+
+// Like static_cast<T>(value), but with specialisations for the Float16 type.
+template <typename T, typename F>
+T StaticCastFPTo(F value) {
+ return static_cast<T>(value);
+}
+
+template <>
+inline float StaticCastFPTo<float, Float16>(Float16 value) {
+ return FPToFloat(value, kIgnoreDefaultNaN);
+}
+
+template <>
+inline double StaticCastFPTo<double, Float16>(Float16 value) {
+ return FPToDouble(value, kIgnoreDefaultNaN);
+}
+
+template <>
+inline Float16 StaticCastFPTo<Float16, float>(float value) {
+ return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
+}
+
+template <>
+inline Float16 StaticCastFPTo<Float16, double>(double value) {
+ return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
+}
+
+template <typename T>
+uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) {
+ switch (size_in_bits) {
+ case 16:
+ return Float16ToRawbits(StaticCastFPTo<Float16>(value));
+ case 32:
+ return FloatToRawbits(StaticCastFPTo<float>(value));
+ case 64:
+ return DoubleToRawbits(StaticCastFPTo<double>(value));
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
+template <typename T>
+T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) {
+ VIXL_ASSERT(IsUintN(size_in_bits, value));
+ switch (size_in_bits) {
+ case 16:
+ return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value)));
+ case 32:
+ return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value)));
+ case 64:
+ return StaticCastFPTo<T>(RawbitsToDouble(value));
+ }
+ VIXL_UNREACHABLE();
+ return 0;
+}
+
} // namespace vixl
#endif // VIXL_UTILS_H