diff options
author | Artem Serov <artem.serov@linaro.org> | 2020-11-09 15:26:22 +0000 |
---|---|---|
committer | Artem Serov <artem.serov@linaro.org> | 2020-11-10 15:33:15 +0000 |
commit | 5a229a9923d9dab968d7fe186ffa47ac52f9e065 (patch) | |
tree | ca689c0233a213244a288044dbb6cfc86d99be60 /src | |
parent | aa1d76b1824ec9bcf66af54fbdc9d137a3c398d5 (diff) | |
download | vixl-5a229a9923d9dab968d7fe186ffa47ac52f9e065.tar.gz |
Revert^2 "Merge remote-tracking branch 'aosp/upstream-master' into..."
This reverts commit 0a75ba66aa15ea1cdb3f57d0efd4ce7e7c14d45f.
Test: mma test-art-host-vixl
Test: test.py --host --optimizing --jit --gtest
Test: test.py --target --optimizing --jit
Test: run-gtests.sh
Change-Id: I052ab4d3243b0b9bee4c52d00ba4ef1d93a8d32c
Diffstat (limited to 'src')
36 files changed, 35236 insertions, 4027 deletions
diff --git a/src/aarch32/disasm-aarch32.cc b/src/aarch32/disasm-aarch32.cc index 9ed3a831..535f60c8 100644 --- a/src/aarch32/disasm-aarch32.cc +++ b/src/aarch32/disasm-aarch32.cc @@ -8288,13 +8288,13 @@ void Disassembler::DecodeT32(uint32_t instr) { UnallocatedT32(instr); return; } - unsigned firstcond = (instr >> 20) & 0xf; + unsigned first_cond = (instr >> 20) & 0xf; unsigned mask = (instr >> 16) & 0xf; - bool wasInITBlock = InITBlock(); - SetIT(Condition(firstcond), mask); - it(Condition(firstcond), mask); - if (wasInITBlock || (firstcond == 15) || - ((firstcond == al) && + bool was_in_it_block = InITBlock(); + SetIT(Condition(first_cond), mask); + it(Condition(first_cond), mask); + if (was_in_it_block || (first_cond == 15) || + ((first_cond == al) && (BitCount(Uint32(mask)) != 1))) { UnpredictableT32(instr); } diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h index d0ff52b3..6d76642f 100644 --- a/src/aarch32/macro-assembler-aarch32.h +++ b/src/aarch32/macro-assembler-aarch32.h @@ -268,7 +268,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE), pool_end_(NULL) { #ifdef VIXL_DEBUG - SetAllowMacroInstructions(true); + SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + true); #else USE(allow_macro_instructions_); #endif @@ -283,7 +284,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE), pool_end_(NULL) { #ifdef VIXL_DEBUG - SetAllowMacroInstructions(true); + SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + true); #endif } MacroAssembler(byte* buffer, size_t size, InstructionSet isa = kDefaultISA) @@ -296,7 +298,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE), pool_end_(NULL) { #ifdef VIXL_DEBUG - SetAllowMacroInstructions(true); + SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + true); #endif } diff --git a/src/aarch32/operands-aarch32.h b/src/aarch32/operands-aarch32.h index 1d18bfd3..2b452958 100644 --- a/src/aarch32/operands-aarch32.h +++ b/src/aarch32/operands-aarch32.h @@ -54,28 +54,16 @@ class Operand { // This is allowed to be an implicit constructor because Operand is // a wrapper class that doesn't normally perform any type conversion. Operand(uint32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoReg), - shift_(LSL), - amount_(0), - rs_(NoReg) {} + : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {} Operand(int32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoReg), - shift_(LSL), - amount_(0), - rs_(NoReg) {} + : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {} // rm // where rm is the base register // This is allowed to be an implicit constructor because Operand is // a wrapper class that doesn't normally perform any type conversion. Operand(Register rm) // NOLINT(runtime/explicit) - : imm_(0), - rm_(rm), - shift_(LSL), - amount_(0), - rs_(NoReg) { + : imm_(0), rm_(rm), shift_(LSL), amount_(0), rs_(NoReg) { VIXL_ASSERT(rm_.IsValid()); } @@ -245,22 +233,18 @@ class NeonImmediate { // This is allowed to be an implicit constructor because NeonImmediate is // a wrapper class that doesn't normally perform any type conversion. NeonImmediate(uint32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - immediate_type_(I32) {} + : imm_(immediate), immediate_type_(I32) {} NeonImmediate(int immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - immediate_type_(I32) {} + : imm_(immediate), immediate_type_(I32) {} // { #<immediate> } // where <immediate> is a 64 bit number // This is allowed to be an implicit constructor because NeonImmediate is // a wrapper class that doesn't normally perform any type conversion. NeonImmediate(int64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - immediate_type_(I64) {} + : imm_(immediate), immediate_type_(I64) {} NeonImmediate(uint64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - immediate_type_(I64) {} + : imm_(immediate), immediate_type_(I64) {} // { #<immediate> } // where <immediate> is a non zero floating point number which can be encoded @@ -268,11 +252,9 @@ class NeonImmediate { // This is allowed to be an implicit constructor because NeonImmediate is // a wrapper class that doesn't normally perform any type conversion. NeonImmediate(float immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - immediate_type_(F32) {} + : imm_(immediate), immediate_type_(F32) {} NeonImmediate(double immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - immediate_type_(F64) {} + : imm_(immediate), immediate_type_(F64) {} NeonImmediate(const NeonImmediate& src) : imm_(src.imm_), immediate_type_(src.immediate_type_) {} @@ -374,29 +356,21 @@ std::ostream& operator<<(std::ostream& os, const NeonImmediate& operand); class NeonOperand { public: NeonOperand(int32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoDReg) {} + : imm_(immediate), rm_(NoDReg) {} NeonOperand(uint32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoDReg) {} + : imm_(immediate), rm_(NoDReg) {} NeonOperand(int64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoDReg) {} + : imm_(immediate), rm_(NoDReg) {} NeonOperand(uint64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoDReg) {} + : imm_(immediate), rm_(NoDReg) {} NeonOperand(float immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoDReg) {} + : imm_(immediate), rm_(NoDReg) {} NeonOperand(double immediate) // NOLINT(runtime/explicit) - : imm_(immediate), - rm_(NoDReg) {} + : imm_(immediate), rm_(NoDReg) {} NeonOperand(const NeonImmediate& imm) // NOLINT(runtime/explicit) - : imm_(imm), - rm_(NoDReg) {} + : imm_(imm), rm_(NoDReg) {} NeonOperand(const VRegister& rm) // NOLINT(runtime/explicit) - : imm_(0), - rm_(rm) { + : imm_(0), rm_(rm) { VIXL_ASSERT(rm_.IsValid()); } diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index 9e73ffaa..e98de89b 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -1044,7 +1044,7 @@ void Assembler::cls(const Register& rd, const Register& rn) { V(auti, AUTI) \ V(autd, AUTD) -#define DEFINE_ASM_FUNCS(PRE, OP) \ +#define VIXL_DEFINE_ASM_FUNC(PRE, OP) \ void Assembler::PRE##a(const Register& xd, const Register& xn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \ @@ -1069,8 +1069,8 @@ void Assembler::cls(const Register& rd, const Register& rn) { Emit(SF(xd) | OP##ZB | Rd(xd)); \ } -PAUTH_VARIATIONS(DEFINE_ASM_FUNCS) -#undef DEFINE_ASM_FUNCS +PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::pacga(const Register& xd, const Register& xn, @@ -1141,7 +1141,13 @@ void Assembler::LoadStorePair(const CPURegister& rt, addrmodeop = LoadStorePairPostIndexFixed; } } - Emit(addrmodeop | memop); + + Instr emitop = addrmodeop | memop; + + // Only X registers may be specified for ldpsw. + VIXL_ASSERT(((emitop & LoadStorePairMask) != LDPSW_x) || rt.IsX()); + + Emit(emitop); } @@ -1381,10 +1387,16 @@ void Assembler::ldr(const CPURegister& rt, int64_t imm19) { } -void Assembler::prfm(PrefetchOperation op, int64_t imm19) { +void Assembler::prfm(int op, int64_t imm19) { Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19)); } +void Assembler::prfm(PrefetchOperation op, int64_t imm19) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfm(static_cast<int>(op), imm19); +} + // Exclusive-access instructions. void Assembler::stxrb(const Register& rs, @@ -1635,17 +1647,18 @@ void Assembler::ldlar(const Register& rt, const MemOperand& src) { V(casal, CASAL) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const Register& rs, \ const Register& rt, \ const MemOperand& src) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ + VIXL_ASSERT(AreSameFormat(rs, rt)); \ LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ } -COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off #define COMPARE_AND_SWAP_W_LIST(V) \ @@ -1659,7 +1672,7 @@ COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC) V(casalh, CASALH) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const Register& rs, \ const Register& rt, \ const MemOperand& src) { \ @@ -1667,8 +1680,8 @@ COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC) VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ } -COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -1679,7 +1692,7 @@ COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC) V(caspal, CASPAL) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const Register& rs, \ const Register& rs1, \ const Register& rt, \ @@ -1691,11 +1704,12 @@ COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC) VIXL_ASSERT(AreEven(rs, rt)); \ VIXL_ASSERT(AreConsecutive(rs, rs1)); \ VIXL_ASSERT(AreConsecutive(rt, rt1)); \ + VIXL_ASSERT(AreSameFormat(rs, rs1, rt, rt1)); \ LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ } -COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +COMPARE_AND_SWAP_PAIR_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // These macros generate all the variations of the atomic memory operations, // e.g. ldadd, ldadda, ldaddb, staddl, etc. @@ -1846,7 +1860,7 @@ void Assembler::ldapursw(const Register& rt, const MemOperand& src) { Emit(LDAPURSW | Rt(rt) | base | ImmLS(static_cast<int>(offset))); } -void Assembler::prfm(PrefetchOperation op, +void Assembler::prfm(int op, const MemOperand& address, LoadStoreScalingOption option) { VIXL_ASSERT(option != RequireUnscaledOffset); @@ -1854,8 +1868,16 @@ void Assembler::prfm(PrefetchOperation op, Prefetch(op, address, option); } +void Assembler::prfm(PrefetchOperation op, + const MemOperand& address, + LoadStoreScalingOption option) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfm(static_cast<int>(op), address, option); +} -void Assembler::prfum(PrefetchOperation op, + +void Assembler::prfum(int op, const MemOperand& address, LoadStoreScalingOption option) { VIXL_ASSERT(option != RequireScaledOffset); @@ -1863,11 +1885,25 @@ void Assembler::prfum(PrefetchOperation op, Prefetch(op, address, option); } +void Assembler::prfum(PrefetchOperation op, + const MemOperand& address, + LoadStoreScalingOption option) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfum(static_cast<int>(op), address, option); +} -void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) { + +void Assembler::prfm(int op, RawLiteral* literal) { prfm(op, static_cast<int>(LinkAndGetWordOffsetTo(literal))); } +void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfm(static_cast<int>(op), literal); +} + void Assembler::sys(int op1, int crn, int crm, int op2, const Register& xt) { VIXL_ASSERT(xt.Is64Bits()); @@ -1933,6 +1969,7 @@ void Assembler::LoadStoreStructVerify(const VRegister& vt, // Assert that addressing mode is either offset (with immediate 0), post // index by immediate of the size of the register list, or post index by a // value in a core register. + VIXL_ASSERT(vt.HasSize() && vt.HasLaneSize()); if (addr.IsImmediateOffset()) { VIXL_ASSERT(addr.GetOffset() == 0); } else { @@ -2290,6 +2327,7 @@ void Assembler::LoadStoreStructSingle(const VRegister& vt, // We support vt arguments of the form vt.VxT() or vt.T(), where x is the // number of lanes, and T is b, h, s or d. unsigned lane_size = vt.GetLaneSizeInBytes(); + VIXL_ASSERT(lane_size > 0); VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size)); // Lane size is encoded in the opcode field. Lane index is encoded in the Q, @@ -2424,7 +2462,7 @@ void Assembler::NEON3DifferentHN(const VRegister& vd, // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, AS) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -2432,8 +2470,8 @@ void Assembler::FN(const VRegister& vd, \ VIXL_ASSERT(AS); \ NEON3DifferentL(vd, vn, vm, OP); \ } -NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off #define NEON_3DIFF_HN_LIST(V) \ @@ -2447,7 +2485,7 @@ NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC) V(rsubhn2, NEON_RSUBHN2, vd.IsQ()) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, AS) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -2455,8 +2493,8 @@ NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC) VIXL_ASSERT(AS); \ NEON3DifferentHN(vd, vn, vm, OP); \ } -NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_3DIFF_HN_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::uaddw(const VRegister& vd, const VRegister& vn, @@ -3104,7 +3142,7 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd, V(fcvtau, NEON_FCVTAU, FCVTAU) \ V(fcvtas, NEON_FCVTAS, FCVTAS) -#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ void Assembler::FN(const Register& rd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ if (vn.IsH()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \ @@ -3119,8 +3157,8 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd, NEONFPConvertToInt(vd, vn, VEC_OP); \ } \ } -NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS) -#undef DEFINE_ASM_FUNCS +NEON_FP2REGMISC_FCVT_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) { @@ -3308,7 +3346,7 @@ void Assembler::NEON3SameFP16(const VRegister& vd, V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar, NEON_FRECPE_H_scalar) // clang-format on -#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ Instr op; \ @@ -3348,8 +3386,8 @@ void Assembler::NEON3SameFP16(const VRegister& vd, NEONFP2RegMisc(vd, vn, op); \ } \ } -NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off #define NEON_FP2REGMISC_V85_LIST(V) \ @@ -3359,7 +3397,7 @@ NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC) V(frint64z, NEON_FRINT64Z, FRINT64Z) // clang-format on -#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt)); \ Instr op; \ @@ -3373,8 +3411,8 @@ NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC) } \ NEONFP2RegMisc(vd, vn, op); \ } -NEON_FP2REGMISC_V85_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_FP2REGMISC_V85_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, @@ -3638,7 +3676,7 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) { V(uqrshl, NEON_UQRSHL, true) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, AS) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -3646,8 +3684,8 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(AS); \ NEON3Same(vd, vn, vm, OP); \ } -NEON_3SAME_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off #define NEON_FP3SAME_OP_LIST(V) \ @@ -3680,7 +3718,7 @@ NEON_3SAME_LIST(DEFINE_ASM_FUNC) // TODO: This macro is complicated because it classifies the instructions in the // macro list above, and treats each case differently. It could be somewhat // simpler if we were to split the macro, at the cost of some duplication. -#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -3720,8 +3758,8 @@ NEON_3SAME_LIST(DEFINE_ASM_FUNC) NEONFP3Same(vd, vn, vm, op); \ } \ } -NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -3732,7 +3770,7 @@ NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC) V(fmlsl2, NEON_FMLSL2) // clang-format on -#define DEFINE_ASM_FUNC(FN, VEC_OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -3744,8 +3782,8 @@ NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC) (vd.Is4S() && vn.Is4H() && vm.Is4H())); \ Emit(FPFormat(vd) | VEC_OP | Rm(vm) | Rn(vn) | Rd(vd)); \ } -NEON_FHM_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_FHM_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::addp(const VRegister& vd, const VRegister& vn) { @@ -4138,7 +4176,7 @@ void Assembler::udot(const VRegister& vd, V(sqrdmulh, NEON_SQRDMULH_byelement, true) \ // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, AS) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4147,8 +4185,8 @@ void Assembler::udot(const VRegister& vd, VIXL_ASSERT(AS); \ NEONByElement(vd, vn, vm, vm_index, OP); \ } -NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -4157,7 +4195,7 @@ NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC) V(sqrdmlsh, NEON_SQRDMLSH_byelement) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4165,8 +4203,8 @@ NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC) VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); \ NEONByElement(vd, vn, vm, vm_index, OP); \ } -NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -4177,7 +4215,7 @@ NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC) V(fmulx, NEON_FMULX_byelement, NEON_FMULX_H_byelement) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, OP_H) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4186,8 +4224,8 @@ NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC) if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ NEONFPByElement(vd, vn, vm, vm_index, OP, OP_H); \ } -NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -4213,7 +4251,7 @@ NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, AS) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4222,8 +4260,8 @@ NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC) VIXL_ASSERT(AS); \ NEONByElementL(vd, vn, vm, vm_index, OP); \ } -NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -4235,7 +4273,7 @@ NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4252,8 +4290,8 @@ NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC) Emit(FPFormat(vd) | OP | Rd(vd) | Rn(vn) | Rm(vm) | \ ImmNEONHLM(vm_index, 3)); \ } -NEON_BYELEMENT_FHM_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_BYELEMENT_FHM_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::suqadd(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); @@ -4763,13 +4801,13 @@ void Assembler::NEONAcrossLanes(const VRegister& vd, V(uminv, NEON_UMINV) // clang-format on -#define DEFINE_ASM_FUNC(FN, OP) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ NEONAcrossLanes(vd, vn, OP, 0); \ } -NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_ACROSSLANES_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC // clang-format off @@ -4780,15 +4818,15 @@ NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC) V(fminnmv, NEON_FMINNMV, NEON_FMINNMV_H) \ // clang-format on -#define DEFINE_ASM_FUNC(FN, OP, OP_H) \ +#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ VIXL_ASSERT(vd.Is1S() || vd.Is1H()); \ NEONAcrossLanes(vd, vn, OP, OP_H); \ } -NEON_ACROSSLANES_FP_LIST(DEFINE_ASM_FUNC) -#undef DEFINE_ASM_FUNC +NEON_ACROSSLANES_FP_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC void Assembler::NEONPerm(const VRegister& vd, @@ -4870,9 +4908,9 @@ void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, int shift, NEONShiftImmediateOp op) { - int laneSizeInBits = vn.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits)); - NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16); + int lane_size_in_bits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + NEONShiftImmediate(vd, vn, op, (lane_size_in_bits + shift) << 16); } @@ -4880,9 +4918,9 @@ void Assembler::NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, int shift, NEONShiftImmediateOp op) { - int laneSizeInBits = vn.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits)); - NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16); + int lane_size_in_bits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits)); + NEONShiftImmediate(vd, vn, op, ((2 * lane_size_in_bits) - shift) << 16); } @@ -4890,9 +4928,9 @@ void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, NEONShiftImmediateOp op) { - int laneSizeInBits = vn.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits)); - int immh_immb = (laneSizeInBits + shift) << 16; + int lane_size_in_bits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + int immh_immb = (lane_size_in_bits + shift) << 16; VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) || (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) || @@ -4908,9 +4946,9 @@ void Assembler::NEONShiftImmediateN(const VRegister& vd, int shift, NEONShiftImmediateOp op) { Instr q, scalar; - int laneSizeInBits = vd.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits)); - int immh_immb = (2 * laneSizeInBits - shift) << 16; + int lane_size_in_bits = vd.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits)); + int immh_immb = (2 * lane_size_in_bits - shift) << 16; if (vn.IsScalar()) { VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) || @@ -5271,6 +5309,7 @@ void Assembler::MoveWide(const Register& rd, } else { // Calculate a new immediate and shift combination to encode the immediate // argument. + VIXL_ASSERT(shift == -1); shift = 0; if ((imm & 0xffffffffffff0000) == 0) { // Nothing to do. @@ -5604,7 +5643,7 @@ void Assembler::DataProcExtendedRegister(const Register& rd, Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, - unsigned access_size, + unsigned access_size_in_bytes_log2, LoadStoreScalingOption option) { Instr base = RnSP(addr.GetBaseRegister()); int64_t offset = addr.GetOffset(); @@ -5614,21 +5653,22 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, (option == PreferUnscaledOffset) || (option == RequireUnscaledOffset); if (prefer_unscaled && IsImmLSUnscaled(offset)) { // Use the unscaled addressing mode. - return base | LoadStoreUnscaledOffsetFixed | - ImmLS(static_cast<int>(offset)); + return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset); } if ((option != RequireUnscaledOffset) && - IsImmLSScaled(offset, access_size)) { + IsImmLSScaled(offset, access_size_in_bytes_log2)) { + // We need `offset` to be positive for the shift to be well-defined. + // IsImmLSScaled should check this. + VIXL_ASSERT(offset >= 0); // Use the scaled addressing mode. return base | LoadStoreUnsignedOffsetFixed | - ImmLSUnsigned(static_cast<int>(offset) >> access_size); + ImmLSUnsigned(offset >> access_size_in_bytes_log2); } if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) { // Use the unscaled addressing mode. - return base | LoadStoreUnscaledOffsetFixed | - ImmLS(static_cast<int>(offset)); + return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset); } } @@ -5649,17 +5689,17 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, // Shifts are encoded in one bit, indicating a left shift by the memory // access size. - VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size)); + VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size_in_bytes_log2)); return base | LoadStoreRegisterOffsetFixed | Rm(addr.GetRegisterOffset()) | ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0); } if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) { - return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset)); + return base | LoadStorePreIndexFixed | ImmLS(offset); } if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) { - return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset)); + return base | LoadStorePostIndexFixed | ImmLS(offset); } // If this point is reached, the MemOperand (addr) cannot be encoded. @@ -5694,7 +5734,7 @@ void Assembler::LoadStorePAC(const Register& xt, } -void Assembler::Prefetch(PrefetchOperation op, +void Assembler::Prefetch(int op, const MemOperand& addr, LoadStoreScalingOption option) { VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset()); @@ -5703,6 +5743,14 @@ void Assembler::Prefetch(PrefetchOperation op, Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option)); } +void Assembler::Prefetch(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + Prefetch(static_cast<int>(op), addr, option); +} + bool Assembler::IsImmAddSub(int64_t immediate) { return IsUint12(immediate) || @@ -5788,17 +5836,17 @@ bool Assembler::IsImmFP64(double imm) { } -bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) { - VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2); - return IsMultiple(offset, 1 << access_size) && - IsInt7(offset / (1 << access_size)); +bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2) { + VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2); + return IsMultiple(offset, 1 << access_size_in_bytes_log2) && + IsInt7(offset / (1 << access_size_in_bytes_log2)); } -bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) { - VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2); - return IsMultiple(offset, 1 << access_size) && - IsUint12(offset / (1 << access_size)); +bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2) { + VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2); + return IsMultiple(offset, 1 << access_size_in_bytes_log2) && + IsUint12(offset / (1 << access_size_in_bytes_log2)); } @@ -5832,7 +5880,8 @@ bool Assembler::IsImmLogical(uint64_t value, unsigned* n, unsigned* imm_s, unsigned* imm_r) { - VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize)); + VIXL_ASSERT((width == kBRegSize) || (width == kHRegSize) || + (width == kSRegSize) || (width == kDRegSize)); bool negate = false; @@ -5873,16 +5922,18 @@ bool Assembler::IsImmLogical(uint64_t value, value = ~value; } - if (width == kWRegSize) { - // To handle 32-bit logical immediates, the very easiest thing is to repeat - // the input value twice to make a 64-bit word. The correct encoding of that - // as a logical immediate will also be the correct encoding of the 32-bit - // value. + if (width <= kWRegSize) { + // To handle 8/16/32-bit logical immediates, the very easiest thing is to repeat + // the input value to fill a 64-bit word. The correct encoding of that as a + // logical immediate will also be the correct encoding of the value. - // Avoid making the assumption that the most-significant 32 bits are zero by + // Avoid making the assumption that the most-significant 56/48/32 bits are zero by // shifting the value left and duplicating it. - value <<= kWRegSize; - value |= value >> kWRegSize; + for (unsigned bits = width; bits <= kWRegSize; bits *= 2) { + value <<= bits; + uint64_t mask = (UINT64_C(1) << bits) - 1; + value |= ((value >> bits) & mask); + } } // The basic analysis idea: imagine our input word looks like this. @@ -6186,152 +6237,5 @@ bool Assembler::CPUHas(SystemRegister sysreg) const { } -bool AreAliased(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4, - const CPURegister& reg5, - const CPURegister& reg6, - const CPURegister& reg7, - const CPURegister& reg8) { - int number_of_valid_regs = 0; - int number_of_valid_fpregs = 0; - - RegList unique_regs = 0; - RegList unique_fpregs = 0; - - const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8}; - - for (size_t i = 0; i < ArrayLength(regs); i++) { - if (regs[i].IsRegister()) { - number_of_valid_regs++; - unique_regs |= regs[i].GetBit(); - } else if (regs[i].IsVRegister()) { - number_of_valid_fpregs++; - unique_fpregs |= regs[i].GetBit(); - } else { - VIXL_ASSERT(!regs[i].IsValid()); - } - } - - int number_of_unique_regs = CountSetBits(unique_regs); - int number_of_unique_fpregs = CountSetBits(unique_fpregs); - - VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs); - VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs); - - return (number_of_valid_regs != number_of_unique_regs) || - (number_of_valid_fpregs != number_of_unique_fpregs); -} - - -bool AreSameSizeAndType(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4, - const CPURegister& reg5, - const CPURegister& reg6, - const CPURegister& reg7, - const CPURegister& reg8) { - VIXL_ASSERT(reg1.IsValid()); - bool match = true; - match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1); - match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1); - match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1); - match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1); - match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1); - match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1); - match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1); - return match; -} - -bool AreEven(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4, - const CPURegister& reg5, - const CPURegister& reg6, - const CPURegister& reg7, - const CPURegister& reg8) { - VIXL_ASSERT(reg1.IsValid()); - bool even = (reg1.GetCode() % 2) == 0; - even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0); - even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0); - even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0); - even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0); - even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0); - even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0); - even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0); - return even; -} - - -bool AreConsecutive(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4) { - VIXL_ASSERT(reg1.IsValid()); - - if (!reg2.IsValid()) { - return true; - } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfRegisters)) { - return false; - } - - if (!reg3.IsValid()) { - return true; - } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfRegisters)) { - return false; - } - - if (!reg4.IsValid()) { - return true; - } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfRegisters)) { - return false; - } - - return true; -} - - -bool AreSameFormat(const VRegister& reg1, - const VRegister& reg2, - const VRegister& reg3, - const VRegister& reg4) { - VIXL_ASSERT(reg1.IsValid()); - bool match = true; - match &= !reg2.IsValid() || reg2.IsSameFormat(reg1); - match &= !reg3.IsValid() || reg3.IsSameFormat(reg1); - match &= !reg4.IsValid() || reg4.IsSameFormat(reg1); - return match; -} - - -bool AreConsecutive(const VRegister& reg1, - const VRegister& reg2, - const VRegister& reg3, - const VRegister& reg4) { - VIXL_ASSERT(reg1.IsValid()); - - if (!reg2.IsValid()) { - return true; - } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfVRegisters)) { - return false; - } - - if (!reg3.IsValid()) { - return true; - } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfVRegisters)) { - return false; - } - - if (!reg4.IsValid()) { - return true; - } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfVRegisters)) { - return false; - } - - return true; -} } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 3ccda1a6..f7aafd07 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -1089,18 +1089,6 @@ class Assembler : public vixl::internal::AssemblerBase { // zero [Armv8.3]. void pacdza(const Register& xd); - // Pointer Authentication Code for Data address, using key A, with address in - // x17 and modifier in x16 [Armv8.3]. - void pacda1716(); - - // Pointer Authentication Code for Data address, using key A, with address in - // LR and modifier in SP [Armv8.3]. - void pacdasp(); - - // Pointer Authentication Code for Data address, using key A, with address in - // LR and a modifier of zero [Armv8.3]. - void pacdaz(); - // Pointer Authentication Code for Data address, using key B [Armv8.3]. void pacdb(const Register& xd, const Register& xn); @@ -1108,18 +1096,6 @@ class Assembler : public vixl::internal::AssemblerBase { // zero [Armv8.3]. void pacdzb(const Register& xd); - // Pointer Authentication Code for Data address, using key B, with address in - // x17 and modifier in x16 [Armv8.3]. - void pacdb1716(); - - // Pointer Authentication Code for Data address, using key B, with address in - // LR and modifier in SP [Armv8.3]. - void pacdbsp(); - - // Pointer Authentication Code for Data address, using key B, with address in - // LR and a modifier of zero [Armv8.3]. - void pacdbz(); - // Pointer Authentication Code, using Generic key [Armv8.3]. void pacga(const Register& xd, const Register& xn, const Register& xm); @@ -1167,36 +1143,12 @@ class Assembler : public vixl::internal::AssemblerBase { // Authenticate Data address, using key A and a modifier of zero [Armv8.3]. void autdza(const Register& xd); - // Authenticate Data address, using key A, with address in x17 and modifier in - // x16 [Armv8.3]. - void autda1716(); - - // Authenticate Data address, using key A, with address in LR and modifier in - // SP [Armv8.3]. - void autdasp(); - - // Authenticate Data address, using key A, with address in LR and a modifier - // of zero [Armv8.3]. - void autdaz(); - // Authenticate Data address, using key B [Armv8.3]. void autdb(const Register& xd, const Register& xn); // Authenticate Data address, using key B and a modifier of zero [Armv8.3]. void autdzb(const Register& xd); - // Authenticate Data address, using key B, with address in x17 and modifier in - // x16 [Armv8.3]. - void autdb1716(); - - // Authenticate Data address, using key B, with address in LR and modifier in - // SP [Armv8.3]. - void autdbsp(); - - // Authenticate Data address, using key B, with address in LR and a modifier - // of zero [Armv8.3]. - void autdbz(); - // Strip Pointer Authentication Code of Data address [Armv8.3]. void xpacd(const Register& xd); @@ -2112,6 +2064,22 @@ class Assembler : public vixl::internal::AssemblerBase { // Prefetch from pc + imm19 << 2. void prfm(PrefetchOperation op, int64_t imm19); + // Prefetch memory (allowing unallocated hints). + void prfm(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + // Prefetch memory (with unscaled offset, allowing unallocated hints). + void prfum(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Prefetch memory in the literal pool (allowing unallocated hints). + void prfm(int op, RawLiteral* literal); + + // Prefetch from pc + imm19 << 2 (allowing unallocated hints). + void prfm(int op, int64_t imm19); + // Move instructions. The default shift of -1 indicates that the move // instruction will calculate an appropriate 16-bit immediate and left shift // that is equal to the 64-bit immediate argument. If an explicit left shift @@ -3618,6 +3586,2240 @@ class Assembler : public vixl::internal::AssemblerBase { const VRegister& vm, int rot); + // Scalable Vector Extensions. + + // Absolute value (predicated). + void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Add vectors (predicated). + void add(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Add vectors (unpredicated). + void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add immediate (unpredicated). + void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); + + // Add multiple of predicate register size to scalar register. + void addpl(const Register& xd, const Register& xn, int imm6); + + // Add multiple of vector register size to scalar register. + void addvl(const Register& xd, const Register& xn, int imm6); + + // Compute vector address. + void adr(const ZRegister& zd, const SVEMemOperand& addr); + + // Bitwise AND predicates. + void and_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise AND vectors (predicated). + void and_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise AND with immediate (unpredicated). + void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise AND vectors (unpredicated). + void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise AND predicates. + void ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise AND reduction to scalar. + void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Arithmetic shift right by immediate (predicated). + void asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Arithmetic shift right by 64-bit wide elements (predicated). + void asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Arithmetic shift right by immediate (unpredicated). + void asr(const ZRegister& zd, const ZRegister& zn, int shift); + + // Arithmetic shift right by 64-bit wide elements (unpredicated). + void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Arithmetic shift right for divide by immediate (predicated). + void asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Reversed arithmetic shift right by vector (predicated). + void asrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise clear predicates. + void bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise clear vectors (predicated). + void bic(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise clear bits using immediate (unpredicated). + void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise clear vectors (unpredicated). + void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise clear predicates. + void bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break after first true condition. + void brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Break after first true condition. + void brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Break before first true condition. + void brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Break before first true condition. + void brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Propagate break to next partition. + void brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Propagate break to next partition. + void brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break after first true condition, propagating from previous partition. + void brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break after first true condition, propagating from previous partition. + void brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break before first true condition, propagating from previous partition. + void brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break before first true condition, propagating from previous partition. + void brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Conditionally extract element after last to general-purpose register. + void clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm); + + // Conditionally extract element after last to SIMD&FP scalar register. + void clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm); + + // Conditionally extract element after last to vector register. + void clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Conditionally extract last element to general-purpose register. + void clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm); + + // Conditionally extract last element to SIMD&FP scalar register. + void clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm); + + // Conditionally extract last element to vector register. + void clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Count leading sign bits (predicated). + void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Count leading zero bits (predicated). + void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + void cmp(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to 64-bit wide elements. + void cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Logically invert boolean condition in vector (predicated). + void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Count non-zero bits (predicated). + void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Set scalar to multiple of predicate constraint element count. + void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Set scalar to multiple of predicate constraint element count. + void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Set scalar to multiple of predicate constraint element count. + void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Set scalar to active predicate element count. + void cntp(const Register& xd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Set scalar to multiple of predicate constraint element count. + void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Shuffle active elements of vector to the right and fill with zero. + void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn); + + // Copy signed integer immediate to vector elements (predicated). + void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1); + + // Copy general-purpose register to vector elements (predicated). + void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn); + + // Copy SIMD&FP scalar register to vector elements (predicated). + void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn); + + // Compare and terminate loop. + void ctermeq(const Register& rn, const Register& rm); + + // Compare and terminate loop. + void ctermne(const Register& rn, const Register& rm); + + // Decrement scalar by multiple of predicate constraint element count. + void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement scalar by multiple of predicate constraint element count. + void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement vector by multiple of predicate constraint element count. + void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement scalar by multiple of predicate constraint element count. + void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement vector by multiple of predicate constraint element count. + void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement scalar by active predicate element count. + void decp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Decrement vector by active predicate element count. + void decp(const ZRegister& zdn, const PRegister& pg); + + // Decrement scalar by multiple of predicate constraint element count. + void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement vector by multiple of predicate constraint element count. + void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Broadcast general-purpose register to vector elements (unpredicated). + void dup(const ZRegister& zd, const Register& xn); + + // Broadcast indexed element to vector (unpredicated). + void dup(const ZRegister& zd, const ZRegister& zn, unsigned index); + + // As for movz/movk/movn, if the default shift of -1 is specified to dup, the + // assembler will pick an appropriate immediate and left shift that is + // equivalent to the immediate argument. If an explicit left shift is + // specified (0 or 8), the immediate must be a signed 8-bit integer. + + // Broadcast signed immediate to vector elements (unpredicated). + void dup(const ZRegister& zd, int imm8, int shift = -1); + + // Broadcast logical bitmask immediate to vector (unpredicated). + void dupm(const ZRegister& zd, uint64_t imm); + + // Bitwise exclusive OR with inverted immediate (unpredicated). + void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise exclusive OR predicates. + void eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise exclusive OR vectors (predicated). + void eor(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise exclusive OR with immediate (unpredicated). + void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise exclusive OR vectors (unpredicated). + void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise exclusive OR predicates. + void eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise XOR reduction to scalar. + void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Extract vector from pair of vectors. + void ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset); + + // Floating-point absolute difference (predicated). + void fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point absolute value (predicated). + void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point absolute compare vectors. + void facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point absolute compare vectors. + void facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point add immediate (predicated). + void fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point add vector (predicated). + void fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point add vector (unpredicated). + void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point add strictly-ordered reduction, accumulating in scalar. + void fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm); + + // Floating-point add recursive reduction to scalar. + void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point complex add with rotate (predicated). + void fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Floating-point compare vector with zero. + void fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point compare vector with zero. + void fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point compare vector with zero. + void fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point complex multiply-add with rotate (predicated). + void fcmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Floating-point complex multiply-add by indexed values with rotate. + void fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Floating-point compare vector with zero. + void fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vector with zero. + void fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vector with zero. + void fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point compare vectors. + void fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Copy floating-point immediate to vector elements (predicated). + void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm); + + // Copy half-precision floating-point immediate to vector elements + // (predicated). + void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { + fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN)); + } + + // Floating-point convert precision (predicated). + void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point convert to signed integer, rounding toward zero + // (predicated). + void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point convert to unsigned integer, rounding toward zero + // (predicated). + void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point divide by vector (predicated). + void fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point reversed divide by vector (predicated). + void fdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Broadcast floating-point immediate to vector elements. + void fdup(const ZRegister& zd, double imm); + + // Broadcast half-precision floating-point immediate to vector elements. + void fdup(const ZRegister& zd, Float16 imm) { + fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); + } + + // Floating-point exponential accelerator. + void fexpa(const ZRegister& zd, const ZRegister& zn); + + // Floating-point fused multiply-add vectors (predicated), writing + // multiplicand [Zdn = Za + Zdn * Zm]. + void fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point maximum with immediate (predicated). + void fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point maximum (predicated). + void fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point maximum number with immediate (predicated). + void fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point maximum number (predicated). + void fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point maximum number recursive reduction to scalar. + void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point maximum recursive reduction to scalar. + void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point minimum with immediate (predicated). + void fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point minimum (predicated). + void fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum number with immediate (predicated). + void fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point minimum number (predicated). + void fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum number recursive reduction to scalar. + void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point minimum recursive reduction to scalar. + void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point fused multiply-add vectors (predicated), writing addend + // [Zda = Zda + Zn * Zm]. + void fmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point fused multiply-add by indexed elements + // (Zda = Zda + Zn * Zm[indexed]). + void fmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Floating-point fused multiply-subtract vectors (predicated), writing + // addend [Zda = Zda + -Zn * Zm]. + void fmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point fused multiply-subtract by indexed elements + // (Zda = Zda + -Zn * Zm[indexed]). + void fmls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Move 8-bit floating-point immediate to vector elements (unpredicated). + void fmov(const ZRegister& zd, double imm); + + // Move 8-bit floating-point immediate to vector elements (predicated). + void fmov(const ZRegister& zd, const PRegisterM& pg, double imm); + + // Floating-point fused multiply-subtract vectors (predicated), writing + // multiplicand [Zdn = Za + -Zdn * Zm]. + void fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point multiply by immediate (predicated). + void fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point multiply vectors (predicated). + void fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point multiply by indexed elements. + void fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index); + + // Floating-point multiply vectors (unpredicated). + void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point multiply-extended vectors (predicated). + void fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point negate (predicated). + void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point negated fused multiply-add vectors (predicated), writing + // multiplicand [Zdn = -Za + -Zdn * Zm]. + void fnmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point negated fused multiply-add vectors (predicated), writing + // addend [Zda = -Zda + -Zn * Zm]. + void fnmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point negated fused multiply-subtract vectors (predicated), + // writing addend [Zda = -Zda + Zn * Zm]. + void fnmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point negated fused multiply-subtract vectors (predicated), + // writing multiplicand [Zdn = -Za + Zdn * Zm]. + void fnmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point reciprocal estimate (unpredicated). + void frecpe(const ZRegister& zd, const ZRegister& zn); + + // Floating-point reciprocal step (unpredicated). + void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point reciprocal exponent (predicated). + void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point reciprocal square root estimate (unpredicated). + void frsqrte(const ZRegister& zd, const ZRegister& zn); + + // Floating-point reciprocal square root step (unpredicated). + void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point adjust exponent by vector (predicated). + void fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point square root (predicated). + void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point subtract immediate (predicated). + void fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point subtract vectors (predicated). + void fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point subtract vectors (unpredicated). + void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point reversed subtract from immediate (predicated). + void fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point reversed subtract vectors (predicated). + void fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point trigonometric multiply-add coefficient. + void ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3); + + // Floating-point trigonometric starting value. + void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point trigonometric select coefficient. + void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Increment scalar by multiple of predicate constraint element count. + void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment scalar by multiple of predicate constraint element count. + void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment vector by multiple of predicate constraint element count. + void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment scalar by multiple of predicate constraint element count. + void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment vector by multiple of predicate constraint element count. + void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment scalar by active predicate element count. + void incp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Increment vector by active predicate element count. + void incp(const ZRegister& zdn, const PRegister& pg); + + // Increment scalar by multiple of predicate constraint element count. + void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment vector by multiple of predicate constraint element count. + void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Create index starting from and incremented by immediate. + void index(const ZRegister& zd, int start, int step); + + // Create index starting from and incremented by general-purpose register. + void index(const ZRegister& zd, const Register& rn, const Register& rm); + + // Create index starting from general-purpose register and incremented by + // immediate. + void index(const ZRegister& zd, const Register& rn, int imm5); + + // Create index starting from immediate and incremented by general-purpose + // register. + void index(const ZRegister& zd, int imm5, const Register& rm); + + // Insert general-purpose register in shifted vector. + void insr(const ZRegister& zdn, const Register& rm); + + // Insert SIMD&FP scalar register in shifted vector. + void insr(const ZRegister& zdn, const VRegister& vm); + + // Extract element after last to general-purpose register. + void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn); + + // Extract element after last to SIMD&FP scalar register. + void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Extract last element to general-purpose register. + void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn); + + // Extract last element to SIMD&FP scalar register. + void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Contiguous/gather load bytes to vector. + void ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load halfwords to vector. + void ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load words to vector. + void ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load doublewords to vector. + void ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // TODO: Merge other loads into the SVEMemOperand versions. + + // Load and broadcast unsigned byte to vector. + void ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast unsigned halfword to vector. + void ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast unsigned word to vector. + void ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast doubleword to vector. + void ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate sixteen bytes. + void ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate eight halfwords. + void ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate four words. + void ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate two doublewords. + void ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast signed byte to vector. + void ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast signed halfword to vector. + void ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast signed word to vector. + void ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load signed bytes to vector. + void ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load signed halfwords to vector. + void ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load signed words to vector. + void ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // TODO: Merge other loads into the SVEMemOperand versions. + + // Contiguous load two-byte structures to two vectors. + void ld2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load two-halfword structures to two vectors. + void ld2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load two-word structures to two vectors. + void ld2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load two-doubleword structures to two vectors. + void ld2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-byte structures to three vectors. + void ld3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-halfword structures to three vectors. + void ld3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-word structures to three vectors. + void ld3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-doubleword structures to three vectors. + void ld3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-byte structures to four vectors. + void ld4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-halfword structures to four vectors. + void ld4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-word structures to four vectors. + void ld4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-doubleword structures to four vectors. + void ld4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault unsigned bytes to vector. + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault unsigned halfwords to vector. + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault unsigned words to vector. + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault doublewords to vector. + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault signed bytes to vector. + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault signed halfwords to vector. + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault signed words to vector. + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Gather load first-fault unsigned bytes to vector. + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault unsigned bytes to vector (immediate index). + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault doublewords to vector (vector index). + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault doublewords to vector (immediate index). + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault unsigned halfwords to vector (vector index). + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault unsigned halfwords to vector (immediate index). + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault signed bytes to vector (vector index). + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault signed bytes to vector (immediate index). + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault signed halfwords to vector (vector index). + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault signed halfwords to vector (immediate index). + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault signed words to vector (vector index). + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault signed words to vector (immediate index). + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault unsigned words to vector (vector index). + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault unsigned words to vector (immediate index). + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Contiguous load non-fault unsigned bytes to vector (immediate index). + void ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault doublewords to vector (immediate index). + void ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault unsigned halfwords to vector (immediate + // index). + void ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault signed bytes to vector (immediate index). + void ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault signed halfwords to vector (immediate index). + void ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault signed words to vector (immediate index). + void ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault unsigned words to vector (immediate index). + void ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal bytes to vector. + void ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal halfwords to vector. + void ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal words to vector. + void ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal doublewords to vector. + void ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load SVE predicate/vector register. + void ldr(const CPURegister& rt, const SVEMemOperand& addr); + + // Logical shift left by immediate (predicated). + void lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Logical shift left by 64-bit wide elements (predicated). + void lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Logical shift left by immediate (unpredicated). + void lsl(const ZRegister& zd, const ZRegister& zn, int shift); + + // Logical shift left by 64-bit wide elements (unpredicated). + void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Reversed logical shift left by vector (predicated). + void lslr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Logical shift right by immediate (predicated). + void lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Logical shift right by 64-bit wide elements (predicated). + void lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Logical shift right by immediate (unpredicated). + void lsr(const ZRegister& zd, const ZRegister& zn, int shift); + + // Logical shift right by 64-bit wide elements (unpredicated). + void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Reversed logical shift right by vector (predicated). + void lsrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise invert predicate. + void not_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Bitwise invert predicate, setting the condition flags. + void nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Multiply-add vectors (predicated), writing multiplicand + // [Zdn = Za + Zdn * Zm]. + void mad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Multiply-add vectors (predicated), writing addend + // [Zda = Zda + Zn * Zm]. + void mla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Multiply-subtract vectors (predicated), writing addend + // [Zda = Zda - Zn * Zm]. + void mls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Move predicates (unpredicated) + void mov(const PRegister& pd, const PRegister& pn); + + // Move predicates (merging) + void mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn); + + // Move predicates (zeroing) + void mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Move general-purpose register to vector elements (unpredicated) + void mov(const ZRegister& zd, const Register& xn); + + // Move SIMD&FP scalar register to vector elements (unpredicated) + void mov(const ZRegister& zd, const VRegister& vn); + + // Move vector register (unpredicated) + void mov(const ZRegister& zd, const ZRegister& zn); + + // Move indexed element to vector elements (unpredicated) + void mov(const ZRegister& zd, const ZRegister& zn, unsigned index); + + // Move general-purpose register to vector elements (predicated) + void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn); + + // Move SIMD&FP scalar register to vector elements (predicated) + void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn); + + // Move vector elements (predicated) + void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Move signed integer immediate to vector elements (predicated) + void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1); + + // Move signed immediate to vector elements (unpredicated). + void mov(const ZRegister& zd, int imm8, int shift); + + // Move logical bitmask immediate to vector (unpredicated). + void mov(const ZRegister& zd, uint64_t imm); + + // Move predicate (unpredicated), setting the condition flags + void movs(const PRegister& pd, const PRegister& pn); + + // Move predicates (zeroing), setting the condition flags + void movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Move prefix (predicated). + void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn); + + // Move prefix (unpredicated). + void movprfx(const ZRegister& zd, const ZRegister& zn); + + // Multiply-subtract vectors (predicated), writing multiplicand + // [Zdn = Za - Zdn * Zm]. + void msb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Multiply vectors (predicated). + void mul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Multiply by immediate (unpredicated). + void mul(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Bitwise NAND predicates. + void nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise NAND predicates. + void nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Negate (predicated). + void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Bitwise NOR predicates. + void nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise NOR predicates. + void nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise invert vector (predicated). + void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Bitwise OR inverted predicate. + void orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR inverted predicate. + void orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR with inverted immediate (unpredicated). + void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise OR predicate. + void orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR vectors (predicated). + void orr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise OR with immediate (unpredicated). + void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise OR vectors (unpredicated). + void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise OR predicate. + void orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR reduction to scalar. + void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Set all predicate elements to false. + void pfalse(const PRegisterWithLaneSize& pd); + + // Set the first active predicate element to true. + void pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Find next active predicate. + void pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Prefetch bytes. + void prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Prefetch halfwords. + void prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Prefetch words. + void prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Prefetch doublewords. + void prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Set condition flags for predicate. + void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn); + + // Initialise predicate from named constraint. + void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL); + + // Initialise predicate from named constraint. + void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL); + + // Unpack and widen half of predicate. + void punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn); + + // Unpack and widen half of predicate. + void punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn); + + // Reverse bits (predicated). + void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Read the first-fault register. + void rdffr(const PRegisterWithLaneSize& pd); + + // Return predicate of succesfully loaded elements. + void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg); + + // Return predicate of succesfully loaded elements. + void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg); + + // Read multiple of vector register size to scalar register. + void rdvl(const Register& xd, int imm6); + + // Reverse all elements in a predicate. + void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn); + + // Reverse all elements in a vector (unpredicated). + void rev(const ZRegister& zd, const ZRegister& zn); + + // Reverse bytes / halfwords / words within elements (predicated). + void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Reverse bytes / halfwords / words within elements (predicated). + void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Reverse bytes / halfwords / words within elements (predicated). + void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed absolute difference (predicated). + void sabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed add reduction to scalar. + void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn); + + // Signed integer convert to floating-point (predicated). + void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed divide (predicated). + void sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed reversed divide (predicated). + void sdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed dot product by indexed quadtuplet. + void sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed dot product. + void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Conditionally select elements from two predicates. + void sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Conditionally select elements from two vectors. + void sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Initialise the first-fault register to all true. + void setffr(); + + // Signed maximum vectors (predicated). + void smax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed maximum with immediate (unpredicated). + void smax(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Signed maximum reduction to scalar. + void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Signed minimum vectors (predicated). + void smin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed minimum with immediate (unpredicated). + void smin(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Signed minimum reduction to scalar. + void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Signed multiply returning high half (predicated). + void smulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Splice two vectors under predicate control. + void splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating add vectors (unpredicated). + void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating add immediate (unpredicated). + void sqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Signed saturating decrement scalar by multiple of 8-bit predicate + // constraint element count. + void sqdecb(const Register& xd, + const Register& wn, + int pattern, + int multiplier); + + // Signed saturating decrement scalar by multiple of 8-bit predicate + // constraint element count. + void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 64-bit predicate + // constraint element count. + void sqdecd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 64-bit predicate + // constraint element count. + void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement vector by multiple of 64-bit predicate + // constraint element count. + void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 16-bit predicate + // constraint element count. + void sqdech(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 16-bit predicate + // constraint element count. + void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement vector by multiple of 16-bit predicate + // constraint element count. + void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement scalar by active predicate element count. + void sqdecp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn); + + // Signed saturating decrement scalar by active predicate element count. + void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg); + + // Signed saturating decrement vector by active predicate element count. + void sqdecp(const ZRegister& zdn, const PRegister& pg); + + // Signed saturating decrement scalar by multiple of 32-bit predicate + // constraint element count. + void sqdecw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 32-bit predicate + // constraint element count. + void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement vector by multiple of 32-bit predicate + // constraint element count. + void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by multiple of 8-bit predicate + // constraint element count. + void sqincb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating increment scalar by multiple of 8-bit predicate + // constraint element count. + void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by multiple of 64-bit predicate + // constraint element count. + void sqincd(const Register& xd, + const Register& wn, + int pattern, + int multiplier); + + // Signed saturating increment scalar by multiple of 64-bit predicate + // constraint element count. + void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment vector by multiple of 64-bit predicate + // constraint element count. + void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by multiple of 16-bit predicate + // constraint element count. + void sqinch(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating increment scalar by multiple of 16-bit predicate + // constraint element count. + void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment vector by multiple of 16-bit predicate + // constraint element count. + void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by active predicate element count. + void sqincp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn); + + // Signed saturating increment scalar by active predicate element count. + void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg); + + // Signed saturating increment vector by active predicate element count. + void sqincp(const ZRegister& zdn, const PRegister& pg); + + // Signed saturating increment scalar by multiple of 32-bit predicate + // constraint element count. + void sqincw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating increment scalar by multiple of 32-bit predicate + // constraint element count. + void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment vector by multiple of 32-bit predicate + // constraint element count. + void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating subtract vectors (unpredicated). + void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating subtract immediate (unpredicated). + void sqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Contiguous/scatter store bytes from vector. + void st1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous/scatter store halfwords from vector. + void st1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous/scatter store words from vector. + void st1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous/scatter store doublewords from vector. + void st1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-byte structures from two vectors. + void st2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-halfword structures from two vectors. + void st2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-word structures from two vectors. + void st2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-doubleword structures from two vectors, + void st2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-byte structures from three vectors. + void st3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-halfword structures from three vectors. + void st3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-word structures from three vectors. + void st3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-doubleword structures from three vectors. + void st3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-byte structures from four vectors. + void st4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-halfword structures from four vectors. + void st4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-word structures from four vectors. + void st4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-doubleword structures from four vectors. + void st4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal bytes from vector. + void stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal halfwords from vector. + void stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal words from vector. + void stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal doublewords from vector. + void stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Store SVE predicate/vector register. + void str(const CPURegister& rt, const SVEMemOperand& addr); + + // Subtract vectors (predicated). + void sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Subtract vectors (unpredicated). + void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract immediate (unpredicated). + void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); + + // Reversed subtract vectors (predicated). + void subr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Reversed subtract from immediate (unpredicated). + void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); + + // Signed unpack and extend half of vector. + void sunpkhi(const ZRegister& zd, const ZRegister& zn); + + // Signed unpack and extend half of vector. + void sunpklo(const ZRegister& zd, const ZRegister& zn); + + // Signed byte extend (predicated). + void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed halfword extend (predicated). + void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed word extend (predicated). + void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Programmable table lookup/permute using vector of indices into a + // vector. + void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleave even or odd elements from two predicates. + void trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave even or odd elements from two vectors. + void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleave even or odd elements from two predicates. + void trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave even or odd elements from two vectors. + void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference (predicated). + void uabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned add reduction to scalar. + void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn); + + // Unsigned integer convert to floating-point (predicated). + void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned divide (predicated). + void udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned reversed divide (predicated). + void udivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned dot product by indexed quadtuplet. + void udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned dot product. + void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned maximum vectors (predicated). + void umax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned maximum with immediate (unpredicated). + void umax(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Unsigned maximum reduction to scalar. + void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Unsigned minimum vectors (predicated). + void umin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned minimum with immediate (unpredicated). + void umin(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Unsigned minimum reduction to scalar. + void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Unsigned multiply returning high half (predicated). + void umulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating add vectors (unpredicated). + void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned saturating add immediate (unpredicated). + void uqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Unsigned saturating decrement scalar by multiple of 8-bit predicate + // constraint element count. + void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement scalar by multiple of 64-bit predicate + // constraint element count. + void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement vector by multiple of 64-bit predicate + // constraint element count. + void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement scalar by multiple of 16-bit predicate + // constraint element count. + void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement vector by multiple of 16-bit predicate + // constraint element count. + void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement scalar by active predicate element count. + void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Unsigned saturating decrement vector by active predicate element count. + void uqdecp(const ZRegister& zdn, const PRegister& pg); + + // Unsigned saturating decrement scalar by multiple of 32-bit predicate + // constraint element count. + void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement vector by multiple of 32-bit predicate + // constraint element count. + void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by multiple of 8-bit predicate + // constraint element count. + void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by multiple of 64-bit predicate + // constraint element count. + void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment vector by multiple of 64-bit predicate + // constraint element count. + void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by multiple of 16-bit predicate + // constraint element count. + void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment vector by multiple of 16-bit predicate + // constraint element count. + void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by active predicate element count. + void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Unsigned saturating increment vector by active predicate element count. + void uqincp(const ZRegister& zdn, const PRegister& pg); + + // Unsigned saturating increment scalar by multiple of 32-bit predicate + // constraint element count. + void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment vector by multiple of 32-bit predicate + // constraint element count. + void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating subtract vectors (unpredicated). + void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned saturating subtract immediate (unpredicated). + void uqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Unsigned unpack and extend half of vector. + void uunpkhi(const ZRegister& zd, const ZRegister& zn); + + // Unsigned unpack and extend half of vector. + void uunpklo(const ZRegister& zd, const ZRegister& zn); + + // Unsigned byte extend (predicated). + void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned halfword extend (predicated). + void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned word extend (predicated). + void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Concatenate even or odd elements from two predicates. + void uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Concatenate even or odd elements from two vectors. + void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Concatenate even or odd elements from two predicates. + void uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Concatenate even or odd elements from two vectors. + void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // While incrementing signed scalar less than or equal to scalar. + void whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While incrementing unsigned scalar lower than scalar. + void whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While incrementing unsigned scalar lower or same as scalar. + void whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While incrementing signed scalar less than scalar. + void whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // Write the first-fault register. + void wrffr(const PRegisterWithLaneSize& pn); + + // Interleave elements from two half predicates. + void zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave elements from two half vectors. + void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleave elements from two half predicates. + void zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave elements from two half vectors. + void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + // Emit generic instructions. // Emit raw instructions into the instruction stream. @@ -3650,20 +5852,20 @@ class Assembler : public vixl::internal::AssemblerBase { // Code generation helpers. // Register encoding. - static Instr Rd(CPURegister rd) { - VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode); - return rd.GetCode() << Rd_offset; + template <int hibit, int lobit> + static Instr Rx(CPURegister rx) { + VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode); + return ImmUnsignedField<hibit, lobit>(rx.GetCode()); } - static Instr Rn(CPURegister rn) { - VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode); - return rn.GetCode() << Rn_offset; - } - - static Instr Rm(CPURegister rm) { - VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); - return rm.GetCode() << Rm_offset; +#define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s) +#define REGISTER_ENCODER(N) \ + static Instr R##N(CPURegister r##N) { \ + return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \ } + CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER) +#undef REGISTER_ENCODER +#undef CPU_REGISTER_FIELD_NAMES static Instr RmNot31(CPURegister rm) { VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); @@ -3671,26 +5873,6 @@ class Assembler : public vixl::internal::AssemblerBase { return Rm(rm); } - static Instr Ra(CPURegister ra) { - VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode); - return ra.GetCode() << Ra_offset; - } - - static Instr Rt(CPURegister rt) { - VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode); - return rt.GetCode() << Rt_offset; - } - - static Instr Rt2(CPURegister rt2) { - VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode); - return rt2.GetCode() << Rt2_offset; - } - - static Instr Rs(CPURegister rs) { - VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode); - return rs.GetCode() << Rs_offset; - } - // These encoding functions allow the stack pointer to be encoded, and // disallow the zero register. static Instr RdSP(Register rd) { @@ -3708,6 +5890,33 @@ class Assembler : public vixl::internal::AssemblerBase { return (rm.GetCode() & kRegCodeMask) << Rm_offset; } + static Instr Pd(PRegister pd) { + return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd); + } + + static Instr Pm(PRegister pm) { + return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm); + } + + static Instr Pn(PRegister pn) { + return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn); + } + + static Instr PgLow8(PRegister pg) { + // Governing predicates can be merging, zeroing, or unqualified. They should + // never have a lane size. + VIXL_ASSERT(!pg.HasLaneSize()); + return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg); + } + + template <int hibit, int lobit> + static Instr Pg(PRegister pg) { + // Governing predicates can be merging, zeroing, or unqualified. They should + // never have a lane size. + VIXL_ASSERT(!pg.HasLaneSize()); + return Rx<hibit, lobit>(pg); + } + // Flags encoding. static Instr Flags(FlagsUpdate S) { if (S == SetFlags) { @@ -3721,6 +5930,26 @@ class Assembler : public vixl::internal::AssemblerBase { static Instr Cond(Condition cond) { return cond << Condition_offset; } + // Generic immediate encoding. + template <int hibit, int lobit> + static Instr ImmField(int64_t imm) { + VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0)); + VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte)); + int fieldsize = hibit - lobit + 1; + VIXL_ASSERT(IsIntN(fieldsize, imm)); + return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit); + } + + // For unsigned immediate encoding. + // TODO: Handle signed and unsigned immediate in satisfactory way. + template <int hibit, int lobit> + static Instr ImmUnsignedField(uint64_t imm) { + VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0)); + VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte)); + VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm)); + return static_cast<Instr>(imm << lobit); + } + // PC-relative address encoding. static Instr ImmPCRelAddress(int64_t imm21) { VIXL_ASSERT(IsInt21(imm21)); @@ -3771,11 +6000,60 @@ class Assembler : public vixl::internal::AssemblerBase { if (IsUint12(imm)) { // No shift required. imm <<= ImmAddSub_offset; } else { - imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset); + imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset); } return imm; } + static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) { + VIXL_ASSERT(IsUint6(imms)); + VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3)); + USE(lane_size); + return imms << SVEImmSetBits_offset; + } + + static Instr SVEImmRotate(unsigned immr, unsigned lane_size) { + VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr)); + USE(lane_size); + return immr << SVEImmRotate_offset; + } + + static Instr SVEBitN(unsigned bitn) { + VIXL_ASSERT(IsUint1(bitn)); + return bitn << SVEBitN_offset; + } + + static Instr SVEDtype(unsigned msize_in_bytes_log2, + unsigned esize_in_bytes_log2, + bool is_signed, + int dtype_h_lsb = 23, + int dtype_l_lsb = 21) { + VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2); + VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2); + Instr dtype_h = msize_in_bytes_log2; + Instr dtype_l = esize_in_bytes_log2; + // Signed forms use the encodings where msize would be greater than esize. + if (is_signed) { + dtype_h = dtype_h ^ 0x3; + dtype_l = dtype_l ^ 0x3; + } + VIXL_ASSERT(IsUint2(dtype_h)); + VIXL_ASSERT(IsUint2(dtype_l)); + VIXL_ASSERT((dtype_h > dtype_l) == is_signed); + + return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb); + } + + static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2, + unsigned esize_in_bytes_log2, + bool is_signed) { + return SVEDtype(msize_in_bytes_log2, + esize_in_bytes_log2, + is_signed, + 23, + 13); + } + static Instr ImmS(unsigned imms, unsigned reg_size) { VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) || ((reg_size == kWRegSize) && IsUint5(imms))); @@ -3856,9 +6134,9 @@ class Assembler : public vixl::internal::AssemblerBase { return TruncateToUint9(imm9) << ImmLS_offset; } - static Instr ImmLSPair(int64_t imm7, unsigned access_size) { - VIXL_ASSERT(IsMultiple(imm7, 1 << access_size)); - int64_t scaled_imm7 = imm7 / (1 << access_size); + static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) { + VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2)); + int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2); VIXL_ASSERT(IsInt7(scaled_imm7)); return TruncateToUint7(scaled_imm7) << ImmLSPair_offset; } @@ -3990,8 +6268,8 @@ class Assembler : public vixl::internal::AssemblerBase { unsigned* n = NULL, unsigned* imm_s = NULL, unsigned* imm_r = NULL); - static bool IsImmLSPair(int64_t offset, unsigned access_size); - static bool IsImmLSScaled(int64_t offset, unsigned access_size); + static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2); + static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2); static bool IsImmLSUnscaled(int64_t offset); static bool IsImmMovn(uint64_t imm, unsigned reg_size); static bool IsImmMovz(uint64_t imm, unsigned reg_size); @@ -4126,6 +6404,30 @@ class Assembler : public vixl::internal::AssemblerBase { } } + template <typename T> + static Instr SVESize(const T& rd) { + VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister()); + VIXL_ASSERT(rd.HasLaneSize()); + switch (rd.GetLaneSizeInBytes()) { + case 1: + return SVE_B; + case 2: + return SVE_H; + case 4: + return SVE_S; + case 8: + return SVE_D; + default: + return 0xffffffff; + } + } + + static Instr ImmSVEPredicateConstraint(int pattern) { + VIXL_ASSERT(IsUint5(pattern)); + return (pattern << ImmSVEPredicateConstraint_offset) & + ImmSVEPredicateConstraint_mask; + } + static Instr ImmNEONHLM(int index, int num_bits) { int h, l, m; if (num_bits == 3) { @@ -4277,9 +6579,93 @@ class Assembler : public vixl::internal::AssemblerBase { const MemOperand& addr, Instr op); + // Set `is_load` to false in default as it's only used in the + // scalar-plus-vector form. + Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2, + int num_regs, + const SVEMemOperand& addr, + bool is_load = false); + + // E.g. st1b, st1h, ... + // This supports both contiguous and scatter stores. + void SVESt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // E.g. ld1b, ld1h, ... + // This supports both contiguous and gather loads. + void SVELd1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed); + + // E.g. ld1rb, ld1rh, ... + void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed); + + // E.g. ldff1b, ldff1h, ... + // This supports both contiguous and gather loads. + void SVELdff1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed); + + // Common code for the helpers above. + void SVELdSt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_signed, + Instr op); + + // Common code for the helpers above. + void SVEScatterGatherHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_load, + bool is_signed, + bool is_first_fault); + + // E.g. st2b, st3h, ... + void SVESt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr); + + // E.g. ld2b, ld3h, ... + void SVELd234Helper(int num_regs, + const ZRegister& zt1, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Common code for the helpers above. + void SVELdSt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr, + Instr op); + + // E.g. ld1qb, ld1qh, ldnt1b, ... + void SVELd1St1ScaImmHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + Instr regoffset_op, + Instr immoffset_op, + int imm_divisor = 1); + void Prefetch(PrefetchOperation op, const MemOperand& addr, LoadStoreScalingOption option = PreferScaledOffset); + void Prefetch(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 // reports a bogus uninitialised warning then. @@ -4287,6 +6673,9 @@ class Assembler : public vixl::internal::AssemblerBase { const Register& rn, const Operand operand, LogicalOp op); + + void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op); + void LogicalImmediate(const Register& rd, const Register& rn, unsigned n, @@ -4306,6 +6695,92 @@ class Assembler : public vixl::internal::AssemblerBase { FlagsUpdate S, AddSubWithCarryOp op); + void CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEIntCompareVectorsOp op); + + void CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm, + SVEIntCompareSignedImmOp op); + + void CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm, + SVEIntCompareUnsignedImmOp op); + + void SVEIntAddSubtractImmUnpredicatedHelper( + SVEIntAddSubtractImm_UnpredicatedOp op, + const ZRegister& zd, + int imm8, + int shift); + + void SVEElementCountToRegisterHelper(Instr op, + const Register& rd, + int pattern, + int multiplier); + + Instr EncodeSVEShiftImmediate(Shift shift_op, + int shift, + int lane_size_in_bits); + + void SVEBitwiseShiftImmediate(const ZRegister& zd, + const ZRegister& zn, + Instr encoded_imm, + SVEBitwiseShiftUnpredicatedOp op); + + void SVEBitwiseShiftImmediatePred(const ZRegister& zdn, + const PRegisterM& pg, + Instr encoded_imm, + SVEBitwiseShiftByImm_PredicatedOp op); + + Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, + const ZRegister& zm, + int index, + Instr op_h, + Instr op_s, + Instr op_d); + + + void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEPrefetchHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) { + // SVE only supports PLD and PST, not PLI. + VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) || + ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM))); + // Check that we can simply map bits. + VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000); + VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000); + // Remaining operations map directly. + return ((prfop & 0b10000) >> 1) | (prfop & 0b00111); + } // Functions for emulating operands not directly supported by the instruction // set. @@ -4507,12 +6982,16 @@ class Assembler : public vixl::internal::AssemblerBase { NEONShiftImmediateOp op); void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); + // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8) + // and *shift is either 0 or 8. Otherwise, leave the values unchanged. + void ResolveSVEImm8Shift(int* imm8, int* shift); + Instr LoadStoreStructAddrModeField(const MemOperand& addr); // Encode the specified MemOperand for the specified access size and scaling // preference. Instr LoadStoreMemOperand(const MemOperand& addr, - unsigned access_size, + unsigned access_size_in_bytes_log2, LoadStoreScalingOption option); // Link the current (not-yet-emitted) instruction to the specified label, then diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc new file mode 100644 index 00000000..f7cf8b21 --- /dev/null +++ b/src/aarch64/assembler-sve-aarch64.cc @@ -0,0 +1,6489 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + +void Assembler::ResolveSVEImm8Shift(int* imm8, int* shift) { + if (*shift < 0) { + VIXL_ASSERT(*shift == -1); + // Derive the shift amount from the immediate. + if (IsInt8(*imm8)) { + *shift = 0; + } else if ((*imm8 % 256) == 0) { + *imm8 /= 256; + *shift = 8; + } + } + + VIXL_ASSERT(IsInt8(*imm8)); + VIXL_ASSERT((*shift == 0) || (*shift == 8)); +} + +// SVEAddressGeneration. + +void Assembler::adr(const ZRegister& zd, const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsVectorPlusVector()); + VIXL_ASSERT( + AreSameLaneSize(zd, addr.GetVectorBase(), addr.GetVectorOffset())); + + int lane_size = zd.GetLaneSizeInBits(); + VIXL_ASSERT((lane_size == kSRegSize) || (lane_size == kDRegSize)); + + int shift_amount = addr.GetShiftAmount(); + VIXL_ASSERT((shift_amount >= 0) && (shift_amount <= 3)); + + Instr op = 0xffffffff; + Instr msz = shift_amount << 10; + SVEOffsetModifier mod = addr.GetOffsetModifier(); + switch (mod) { + case SVE_UXTW: + VIXL_ASSERT(lane_size == kDRegSize); + op = ADR_z_az_d_u32_scaled; + break; + case SVE_SXTW: + VIXL_ASSERT(lane_size == kDRegSize); + op = ADR_z_az_d_s32_scaled; + break; + case SVE_LSL: + case NO_SVE_OFFSET_MODIFIER: + op = (lane_size == kSRegSize) ? ADR_z_az_s_same_scaled + : ADR_z_az_d_same_scaled; + break; + default: + VIXL_UNIMPLEMENTED(); + } + Emit(op | msz | Rd(zd) | Rn(addr.GetVectorBase()) | + Rm(addr.GetVectorOffset())); +} + +void Assembler::SVELogicalImmediate(const ZRegister& zdn, + uint64_t imm, + Instr op) { + unsigned bit_n, imm_s, imm_r; + unsigned lane_size = zdn.GetLaneSizeInBits(); + // Check that the immediate can be encoded in the instruction. + if (IsImmLogical(imm, lane_size, &bit_n, &imm_s, &imm_r)) { + Emit(op | Rd(zdn) | SVEBitN(bit_n) | SVEImmRotate(imm_r, lane_size) | + SVEImmSetBits(imm_s, lane_size)); + } else { + VIXL_UNREACHABLE(); + } +} + +void Assembler::and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + SVELogicalImmediate(zd, imm, AND_z_zi); +} + +void Assembler::dupm(const ZRegister& zd, uint64_t imm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + // DUPM_z_i is an SVEBroadcastBitmaskImmOp, but its encoding and constraints + // are similar enough to SVEBitwiseLogicalWithImm_UnpredicatedOp, that we can + // use the logical immediate encoder to get the correct behaviour. + SVELogicalImmediate(zd, imm, DUPM_z_i); +} + +void Assembler::eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + SVELogicalImmediate(zd, imm, EOR_z_zi); +} + +void Assembler::orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + SVELogicalImmediate(zd, imm, ORR_z_zi); +} + +// SVEBitwiseLogicalUnpredicated. +void Assembler::and_(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(AND_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bic(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(BIC_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::eor(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(EOR_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::orr(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(ORR_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEBitwiseShiftPredicated. + +void Assembler::SVEBitwiseShiftImmediatePred( + const ZRegister& zdn, + const PRegisterM& pg, + Instr encoded_imm_and_tsz, + SVEBitwiseShiftByImm_PredicatedOp op) { + Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) + << 5; + Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; + Emit(op | tszh | tszl_and_imm | PgLow8(pg) | Rd(zdn)); +} + +void Assembler::asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 0000 100. .... .... .... + // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 0 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi); +} + +void Assembler::asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D + // 0000 0100 ..01 1000 100. .... .... .... + // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || + ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && + (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); + Instr op = ASR_z_p_zw; + if (AreSameLaneSize(zd, zn, zm)) { + op = ASR_z_p_zz; + } + Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // ASRD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 0100 100. .... .... .... + // tszh<23:22> | opc<19:18> = 01 | L<17> = 0 | U<16> = 0 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi); +} + +void Assembler::asrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0100 100. .... .... .... + // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ASRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 0011 100. .... .... .... + // tszh<23:22> | opc<19:18> = 00 | L<17> = 1 | U<16> = 1 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi); +} + +void Assembler::lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D + // 0000 0100 ..01 1011 100. .... .... .... + // size<23:22> | R<18> = 0 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || + ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && + (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); + Instr op = LSL_z_p_zw; + if (AreSameLaneSize(zd, zn, zm)) { + op = LSL_z_p_zz; + } + Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lslr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0111 100. .... .... .... + // size<23:22> | R<18> = 1 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(LSLR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> + // 0000 0100 ..00 0001 100. .... .... .... + // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 1 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi); +} + +void Assembler::lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D + // 0000 0100 ..01 1001 100. .... .... .... + // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || + ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && + (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); + Instr op = LSR_z_p_zw; + if (AreSameLaneSize(zd, zn, zm)) { + op = LSR_z_p_zz; + } + Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lsrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0101 100. .... .... .... + // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(LSRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEBitwiseShiftUnpredicated. + +Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op, + int shift, + int lane_size_in_bits) { + if (shift_op == LSL) { + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + return lane_size_in_bits + shift; + } + + VIXL_ASSERT((shift_op == ASR) || (shift_op == LSR)); + VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits)); + return (2 * lane_size_in_bits) - shift; +} + +void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd, + const ZRegister& zn, + Instr encoded_imm_and_tsz, + SVEBitwiseShiftUnpredicatedOp op) { + Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) + << 16; + Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; + Emit(op | tszh | tszl_and_imm | Rd(zd) | Rn(zn)); +} + +void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + Instr encoded_imm = + EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi); +} + +void Assembler::asr(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(ASR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr encoded_imm = + EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi); +} + +void Assembler::lsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(LSL_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr encoded_imm = + EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi); +} + +void Assembler::lsr(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(LSR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEElementCount. + +#define VIXL_SVE_INC_DEC_LIST(V) \ + V(cntb, CNTB_r_s) \ + V(cnth, CNTH_r_s) \ + V(cntw, CNTW_r_s) \ + V(cntd, CNTD_r_s) \ + V(decb, DECB_r_rs) \ + V(dech, DECH_r_rs) \ + V(decw, DECW_r_rs) \ + V(decd, DECD_r_rs) \ + V(incb, INCB_r_rs) \ + V(inch, INCH_r_rs) \ + V(incw, INCW_r_rs) \ + V(incd, INCD_r_rs) \ + V(sqdecb, SQDECB_r_rs_x) \ + V(sqdech, SQDECH_r_rs_x) \ + V(sqdecw, SQDECW_r_rs_x) \ + V(sqdecd, SQDECD_r_rs_x) \ + V(sqincb, SQINCB_r_rs_x) \ + V(sqinch, SQINCH_r_rs_x) \ + V(sqincw, SQINCW_r_rs_x) \ + V(sqincd, SQINCD_r_rs_x) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(rdn.IsX()); \ + Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +#define VIXL_SVE_UQINC_UQDEC_LIST(V) \ + V(uqdecb, (rdn.IsX() ? UQDECB_r_rs_x : UQDECB_r_rs_uw)) \ + V(uqdech, (rdn.IsX() ? UQDECH_r_rs_x : UQDECH_r_rs_uw)) \ + V(uqdecw, (rdn.IsX() ? UQDECW_r_rs_x : UQDECW_r_rs_uw)) \ + V(uqdecd, (rdn.IsX() ? UQDECD_r_rs_x : UQDECD_r_rs_uw)) \ + V(uqincb, (rdn.IsX() ? UQINCB_r_rs_x : UQINCB_r_rs_uw)) \ + V(uqinch, (rdn.IsX() ? UQINCH_r_rs_x : UQINCH_r_rs_uw)) \ + V(uqincw, (rdn.IsX() ? UQINCW_r_rs_x : UQINCW_r_rs_uw)) \ + V(uqincd, (rdn.IsX() ? UQINCD_r_rs_x : UQINCD_r_rs_uw)) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_UQINC_UQDEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +#define VIXL_SVE_SQX_INC_DEC_LIST(V) \ + V(sqdecb, SQDECB) \ + V(sqdech, SQDECH) \ + V(sqdecw, SQDECW) \ + V(sqdecd, SQDECD) \ + V(sqincb, SQINCB) \ + V(sqinch, SQINCH) \ + V(sqincw, SQINCW) \ + V(sqincd, SQINCD) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& xd, \ + const Register& wn, \ + int pattern, \ + int multiplier) { \ + USE(wn); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(wn.IsW() && xd.Is(wn.X())); \ + Emit(OP##_r_rs_sx | Rd(xd) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_SQX_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +#define VIXL_SVE_INC_DEC_VEC_LIST(V) \ + V(dech, DEC, H) \ + V(decw, DEC, W) \ + V(decd, DEC, D) \ + V(inch, INC, H) \ + V(incw, INC, W) \ + V(incd, INC, D) \ + V(sqdech, SQDEC, H) \ + V(sqdecw, SQDEC, W) \ + V(sqdecd, SQDEC, D) \ + V(sqinch, SQINC, H) \ + V(sqincw, SQINC, W) \ + V(sqincd, SQINC, D) \ + V(uqdech, UQDEC, H) \ + V(uqdecw, UQDEC, W) \ + V(uqdecd, UQDEC, D) \ + V(uqinch, UQINC, H) \ + V(uqincw, UQINC, W) \ + V(uqincd, UQINC, D) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, T) \ + void Assembler::FN(const ZRegister& zdn, int pattern, int multiplier) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(zdn.GetLaneSizeInBytes() == k##T##RegSizeInBytes); \ + Emit(OP##T##_z_zs | Rd(zdn) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_INC_DEC_VEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// SVEFPAccumulatingReduction. + +void Assembler::fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> + // 0110 0101 ..01 1000 001. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Vdn<4:0> + + USE(vn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.Is(vn)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zm.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zm, vd)); + + Emit(FADDA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); +} + +// SVEFPArithmetic_Predicated. + +void Assembler::fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 1000 100. .... .... .... + // size<23:22> | opc<19:16> = 1000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1000 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FADD_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0000 100. .... .... .... + // size<23:22> | opc<19:16> = 0000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 1101 100. .... .... .... + // size<23:22> | opc<19:16> = 1101 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 1100 100. .... .... .... + // size<23:22> | opc<19:16> = 1100 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1110 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMAX_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0110 100. .... .... .... + // size<23:22> | opc<19:16> = 0110 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1100 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMAXNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0100 100. .... .... .... + // size<23:22> | opc<19:16> = 0100 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMAXNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1111 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 111 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMIN_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0111 100. .... .... .... + // size<23:22> | opc<19:16> = 0111 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1101 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMINNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0101 100. .... .... .... + // size<23:22> | opc<19:16> = 0101 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMINNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1010 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 2.0)); + + Instr i1 = (imm == 2.0) ? (1 << 5) : 0; + Emit(FMUL_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0010 100. .... .... .... + // size<23:22> | opc<19:16> = 0010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 1010 100. .... .... .... + // size<23:22> | opc<19:16> = 1010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMULX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 1001 100. .... .... .... + // size<23:22> | opc<19:16> = 1001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSCALE_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1001 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FSUB_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0001 100. .... .... .... + // size<23:22> | opc<19:16> = 0001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> + // 0110 0101 ..01 1011 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FSUBR_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0110 0101 ..00 0011 100. .... .... .... + // size<23:22> | opc<19:16> = 0011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3) { + // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> + // 0110 0101 ..01 0... 1000 00.. .... .... + // size<23:22> | imm3<18:16> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FTMAD_z_zzi | SVESize(zd) | Rd(zd) | Rn(zm) | + ImmUnsignedField<18, 16>(imm3)); +} + +// SVEFPArithmeticUnpredicated. + +void Assembler::fadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 0000 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 0000 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::frecps(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 0001 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRECPS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::frsqrts(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 0001 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRSQRTS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::fsub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 0000 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ftsmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 0000 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FTSMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEFPCompareVectors. + +void Assembler::facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 110. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FACGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 111. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FACGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 011. .... ...0 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | + // o3<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMEQ_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 010. .... ...0 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 010. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 011. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMNE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..0. .... 110. .... ...0 .... + // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMUO_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +// SVEFPCompareWithZero. + +void Assembler::fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 + // 0110 0101 ..01 0010 001. .... ...0 .... + // size<23:22> | eq<17> = 1 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMEQ_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 + // 0110 0101 ..01 0000 001. .... ...0 .... + // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMGE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 + // 0110 0101 ..01 0000 001. .... ...1 .... + // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMGT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 + // 0110 0101 ..01 0001 001. .... ...1 .... + // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMLE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 + // 0110 0101 ..01 0001 001. .... ...0 .... + // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMLT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 + // 0110 0101 ..01 0011 001. .... ...0 .... + // size<23:22> | eq<17> = 1 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMNE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +// SVEFPComplexAddition. + +void Assembler::fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> + // 0110 0100 ..00 000. 100. .... .... .... + // size<23:22> | rot<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((rot == 90) || (rot == 270)); + + Instr rotate_bit = (rot == 90) ? 0 : (1 << 16); + Emit(FCADD_z_p_zz | rotate_bit | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEFPComplexMulAdd. + +void Assembler::fcmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> + // 0110 0100 ..0. .... 0... .... .... .... + // size<23:22> | Zm<20:16> | rot<14:13> | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 13; + Emit(FCMLA_z_p_zzz | rotate_bit | SVESize(zda) | Rd(zda) | PgLow8(pg) | + Rn(zn) | Rm(zm)); +} + +// SVEFPComplexMulAddIndex. + +void Assembler::fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VIXL_ASSERT(index >= 0); + + int lane_size = zda.GetLaneSizeInBytes(); + + Instr zm_and_idx = 0; + Instr op = FCMLA_z_zzzi_h; + if (lane_size == kHRegSizeInBytes) { + // Zm<18:16> | i2<20:19> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3)); + zm_and_idx = (index << 19) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i1<20> + VIXL_ASSERT(lane_size == kSRegSizeInBytes); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1)); + zm_and_idx = (index << 20) | Rx<19, 16>(zm); + op = FCMLA_z_zzzi_s; + } + + Instr rotate_bit = (rot / 90) << 10; + Emit(op | zm_and_idx | rotate_bit | Rd(zda) | Rn(zn)); +} + +// SVEFPFastReduction. + +void Assembler::faddv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FADDV <V><d>, <Pg>, <Zn>.<T> + // 0110 0101 ..00 0000 001. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FADDV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fmaxnmv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMAXNMV <V><d>, <Pg>, <Zn>.<T> + // 0110 0101 ..00 0100 001. .... .... .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMAXNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fmaxv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMAXV <V><d>, <Pg>, <Zn>.<T> + // 0110 0101 ..00 0110 001. .... .... .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMAXV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fminnmv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMINNMV <V><d>, <Pg>, <Zn>.<T> + // 0110 0101 ..00 0101 001. .... .... .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMINNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fminv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMINV <V><d>, <Pg>, <Zn>.<T> + // 0110 0101 ..00 0111 001. .... .... .... + // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMINV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +// SVEFPMulAdd. + +void Assembler::fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> + // 0110 0101 ..1. .... 100. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 00 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +void Assembler::fmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..1. .... 000. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 00 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..1. .... 001. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 01 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> + // 0110 0101 ..1. .... 101. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 01 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +void Assembler::fnmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> + // 0110 0101 ..1. .... 110. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 10 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +void Assembler::fnmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..1. .... 010. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 10 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fnmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> + // 0110 0101 ..1. .... 011. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 11 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fnmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> + // 0110 0101 ..1. .... 111. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 11 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, + const ZRegister& zm, + int index, + Instr op_h, + Instr op_s, + Instr op_d) { + Instr size = lane_size_in_bytes_log2 << SVESize_offset; + Instr zm_with_index = Rm(zm); + Instr op = 0xffffffff; + // Allowable register number and lane index depends on the lane size. + switch (lane_size_in_bytes_log2) { + case kHRegSizeInBytesLog2: + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint3(index)); + // For H-sized lanes, size is encoded as 0b0x, where x is used as the top + // bit of the index. So, if index is less than four, the top bit of index + // is zero, and therefore size is 0b00. Otherwise, it's 0b01, the usual + // encoding for H-sized lanes. + if (index < 4) size = 0; + // Top two bits of "zm" encode the index. + zm_with_index |= (index & 3) << (Rm_offset + 3); + op = op_h; + break; + case kSRegSizeInBytesLog2: + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint2(index)); + // Top two bits of "zm" encode the index. + zm_with_index |= (index & 3) << (Rm_offset + 3); + op = op_s; + break; + case kDRegSizeInBytesLog2: + VIXL_ASSERT(zm.GetCode() <= 15); + VIXL_ASSERT(IsUint1(index)); + // Top bit of "zm" encodes the index. + zm_with_index |= (index & 1) << (Rm_offset + 4); + op = op_d; + break; + default: + VIXL_UNIMPLEMENTED(); + } + return op | zm_with_index | size; +} + +// SVEFPMulAddIndex. + +void Assembler::fmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + FMLA_z_zzzi_h, + FMLA_z_zzzi_s, + FMLA_z_zzzi_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +void Assembler::fmls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + FMLS_z_zzzi_h, + FMLS_z_zzzi_s, + FMLS_z_zzzi_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +// SVEFPMulIndex. + +// This prototype maps to 3 instruction encodings: +void Assembler::fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index) { + // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>[<imm>] + // 0110 0100 ..1. .... 0010 00.. .... .... + // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEFPMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + FMUL_z_zzi_h, + FMUL_z_zzi_s, + FMUL_z_zzi_d); + + Emit(synthesized_op | Rd(zd) | Rn(zn)); +} + +// SVEFPUnaryOpPredicated. + +void Assembler::fcvt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVT_z_p_z_h2s; + break; + case kDRegSizeInBytes: + op = FCVT_z_p_z_h2d; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVT_z_p_z_s2h; + break; + case kDRegSizeInBytes: + op = FCVT_z_p_z_s2d; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVT_z_p_z_d2h; + break; + case kSRegSizeInBytes: + op = FCVT_z_p_z_d2s; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtzs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVTZS_z_p_z_fp162h; + break; + case kSRegSizeInBytes: + op = FCVTZS_z_p_z_fp162w; + break; + case kDRegSizeInBytes: + op = FCVTZS_z_p_z_fp162x; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZS_z_p_z_s2w; + break; + case kDRegSizeInBytes: + op = FCVTZS_z_p_z_s2x; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZS_z_p_z_d2w; + break; + case kDRegSizeInBytes: + op = FCVTZS_z_p_z_d2x; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtzu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVTZU_z_p_z_fp162h; + break; + case kSRegSizeInBytes: + op = FCVTZU_z_p_z_fp162w; + break; + case kDRegSizeInBytes: + op = FCVTZU_z_p_z_fp162x; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZU_z_p_z_s2w; + break; + case kDRegSizeInBytes: + op = FCVTZU_z_p_z_s2x; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZU_z_p_z_d2w; + break; + case kDRegSizeInBytes: + op = FCVTZU_z_p_z_d2x; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frecpx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0110 0101 ..00 1100 101. .... .... .... + // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRECPX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frinta(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTA_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frinti(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTI_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTM_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintn(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTN_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTP_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintz(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fsqrt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FSQRT <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0110 0101 ..00 1101 101. .... .... .... + // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSQRT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::scvtf(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = SCVTF_z_p_z_h2fp16; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = SCVTF_z_p_z_w2fp16; + break; + case kSRegSizeInBytes: + op = SCVTF_z_p_z_w2s; + break; + case kDRegSizeInBytes: + op = SCVTF_z_p_z_w2d; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = SCVTF_z_p_z_x2fp16; + break; + case kSRegSizeInBytes: + op = SCVTF_z_p_z_x2s; + break; + case kDRegSizeInBytes: + op = SCVTF_z_p_z_x2d; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::ucvtf(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = UCVTF_z_p_z_h2fp16; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = UCVTF_z_p_z_w2fp16; + break; + case kSRegSizeInBytes: + op = UCVTF_z_p_z_w2s; + break; + case kDRegSizeInBytes: + op = UCVTF_z_p_z_w2d; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = UCVTF_z_p_z_x2fp16; + break; + case kSRegSizeInBytes: + op = UCVTF_z_p_z_x2s; + break; + case kDRegSizeInBytes: + op = UCVTF_z_p_z_x2d; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +// SVEFPUnaryOpUnpredicated. + +void Assembler::frecpe(const ZRegister& zd, const ZRegister& zn) { + // FRECPE <Zd>.<T>, <Zn>.<T> + // 0110 0101 ..00 1110 0011 00.. .... .... + // size<23:22> | opc<18:16> = 110 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRECPE_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::frsqrte(const ZRegister& zd, const ZRegister& zn) { + // FRSQRTE <Zd>.<T>, <Zn>.<T> + // 0110 0101 ..00 1111 0011 00.. .... .... + // size<23:22> | opc<18:16> = 111 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRSQRTE_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +// SVEIncDecByPredicateCount. + +void Assembler::decp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // DECP <Xdn>, <Pg>.<T> + // 0010 0101 ..10 1101 1000 100. .... .... + // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rdn.IsX()); + + Emit(DECP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::decp(const ZRegister& zdn, const PRegister& pg) { + // DECP <Zdn>.<T>, <Pg> + // 0010 0101 ..10 1101 1000 000. .... .... + // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> | + // Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(DECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::incp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // INCP <Xdn>, <Pg>.<T> + // 0010 0101 ..10 1100 1000 100. .... .... + // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rdn.IsX()); + + Emit(INCP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::incp(const ZRegister& zdn, const PRegister& pg) { + // INCP <Zdn>.<T>, <Pg> + // 0010 0101 ..10 1100 1000 000. .... .... + // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> | + // Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(INCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::sqdecp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn) { + // SQDECP <Xdn>, <Pg>.<T>, <Wdn> + // 0010 0101 ..10 1010 1000 100. .... .... + // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + USE(wn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn)); + + Emit(SQDECP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg)); +} + +void Assembler::sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) { + // SQDECP <Xdn>, <Pg>.<T> + // 0010 0101 ..10 1010 1000 110. .... .... + // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xdn.IsX()); + + Emit(SQDECP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg)); +} + +void Assembler::sqdecp(const ZRegister& zdn, const PRegister& pg) { + // SQDECP <Zdn>.<T>, <Pg> + // 0010 0101 ..10 1010 1000 000. .... .... + // size<23:22> | D<17> = 1 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(SQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::sqincp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn) { + // SQINCP <Xdn>, <Pg>.<T>, <Wdn> + // 0010 0101 ..10 1000 1000 100. .... .... + // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + USE(wn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn)); + + Emit(SQINCP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg)); +} + +void Assembler::sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) { + // SQINCP <Xdn>, <Pg>.<T> + // 0010 0101 ..10 1000 1000 110. .... .... + // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xdn.IsX()); + + Emit(SQINCP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg)); +} + +void Assembler::sqincp(const ZRegister& zdn, const PRegister& pg) { + // SQINCP <Zdn>.<T>, <Pg> + // 0010 0101 ..10 1000 1000 000. .... .... + // size<23:22> | D<17> = 0 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(SQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // UQDECP <Wdn>, <Pg>.<T> + // UQDECP <Xdn>, <Pg>.<T> + // 0010 0101 ..10 1011 1000 10.. .... .... + // size<23:22> | D<17> = 1 | U<16> = 1 | sf<10> | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Instr op = rdn.IsX() ? UQDECP_r_p_r_x : UQDECP_r_p_r_uw; + Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::uqdecp(const ZRegister& zdn, const PRegister& pg) { + // UQDECP <Zdn>.<T>, <Pg> + // 0010 0101 ..10 1011 1000 000. .... .... + // size<23:22> | D<17> = 1 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(UQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // UQINCP <Wdn>, <Pg>.<T> + // 0010 0101 ..10 1001 1000 100. .... .... + // size<23:22> | D<17> = 0 | U<16> = 1 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Instr op = rdn.IsX() ? UQINCP_r_p_r_x : UQINCP_r_p_r_uw; + Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::uqincp(const ZRegister& zdn, const PRegister& pg) { + // UQINCP <Zdn>.<T>, <Pg> + // 0010 0101 ..10 1001 1000 000. .... .... + // size<23:22> | D<17> = 0 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(UQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +// SVEIndexGeneration. + +void Assembler::index(const ZRegister& zd, int start, int step) { + // INDEX <Zd>.<T>, #<imm1>, #<imm2> + // 0000 0100 ..1. .... 0100 00.. .... .... + // size<23:22> | step<20:16> | start<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(INDEX_z_ii | SVESize(zd) | ImmField<20, 16>(step) | + ImmField<9, 5>(start) | Rd(zd)); +} + +void Assembler::index(const ZRegister& zd, + const Register& rn, + const Register& rm) { + // INDEX <Zd>.<T>, <R><n>, <R><m> + // 0000 0100 ..1. .... 0100 11.. .... .... + // size<23:22> | Rm<20:16> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(INDEX_z_rr | SVESize(zd) | Rd(zd) | Rn(rn) | Rm(rm)); +} + +void Assembler::index(const ZRegister& zd, const Register& rn, int imm5) { + // INDEX <Zd>.<T>, <R><n>, #<imm> + // 0000 0100 ..1. .... 0100 01.. .... .... + // size<23:22> | imm5<20:16> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(INDEX_z_ri | SVESize(zd) | Rd(zd) | Rn(rn) | ImmField<20, 16>(imm5)); +} + +void Assembler::index(const ZRegister& zd, int imm5, const Register& rm) { + // INDEX <Zd>.<T>, #<imm>, <R><m> + // 0000 0100 ..1. .... 0100 10.. .... .... + // size<23:22> | Rm<20:16> | imm5<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(INDEX_z_ir | SVESize(zd) | Rd(zd) | ImmField<9, 5>(imm5) | Rm(rm)); +} + +// SVEIntArithmeticUnpredicated. + +void Assembler::add(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0000 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0001 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqsub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0001 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0000 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uqadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0001 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uqsub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 0001 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEIntBinaryArithmeticPredicated. + +void Assembler::add(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 0000 000. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(ADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::and_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 1010 000. .... .... .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(AND_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::bic(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 1011 000. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(BIC_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::eor(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 1001 000. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(EOR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::mul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0000 000. .... .... .... + // size<23:22> | H<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(MUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::orr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 1000 000. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(ORR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 1100 000. .... .... .... + // size<23:22> | opc<18:17> = 10 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0100 000. .... .... .... + // size<23:22> | R<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(SDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0110 000. .... .... .... + // size<23:22> | R<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(SDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::smax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 1000 000. .... .... .... + // size<23:22> | opc<18:17> = 00 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::smin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 1010 000. .... .... .... + // size<23:22> | opc<18:17> = 01 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::smulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0010 000. .... .... .... + // size<23:22> | H<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 0001 000. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::subr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 0011 000. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 1101 000. .... .... .... + // size<23:22> | opc<18:17> = 10 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0101 000. .... .... .... + // size<23:22> | R<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(UDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::udivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0111 000. .... .... .... + // size<23:22> | R<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(UDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 1001 000. .... .... .... + // size<23:22> | opc<18:17> = 00 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..00 1011 000. .... .... .... + // size<23:22> | opc<18:17> = 01 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> + // 0000 0100 ..01 0011 000. .... .... .... + // size<23:22> | H<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEIntCompareScalars. + +void Assembler::ctermeq(const Register& rn, const Register& rm) { + // CTERMEQ <R><n>, <R><m> + // 0010 0101 1.1. .... 0010 00.. ...0 0000 + // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 0 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000; + + Emit(CTERMEQ_rr | sz | Rn(rn) | Rm(rm)); +} + +void Assembler::ctermne(const Register& rn, const Register& rm) { + // CTERMNE <R><n>, <R><m> + // 0010 0101 1.1. .... 0010 00.. ...1 0000 + // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 1 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000; + + Emit(CTERMNE_rr | sz | Rn(rn) | Rm(rm)); +} + +void Assembler::whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELE <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 01.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELE_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELO <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 11.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELO_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELS <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 11.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELS_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELT <Pd>.<T>, <R><n>, <R><m> + // 0010 0101 ..1. .... 000. 01.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELT_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEIntCompareVectorsOp op) { + Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm, + SVEIntCompareSignedImmOp op) { + Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm)); +} + +void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm, + SVEIntCompareUnsignedImmOp op) { + Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | + ImmUnsignedField<20, 14>(imm)); +} + +void Assembler::cmp(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + switch (cond) { + case eq: + cmpeq(pd, pg, zn, zm); + break; + case ge: + cmpge(pd, pg, zn, zm); + break; + case gt: + cmpgt(pd, pg, zn, zm); + break; + case le: + cmple(pd, pg, zn, zm); + break; + case lt: + cmplt(pd, pg, zn, zm); + break; + case ne: + cmpne(pd, pg, zn, zm); + break; + case hi: + cmphi(pd, pg, zn, zm); + break; + case hs: + cmphs(pd, pg, zn, zm); + break; + case lo: + cmplo(pd, pg, zn, zm); + break; + case ls: + cmpls(pd, pg, zn, zm); + break; + default: + VIXL_UNREACHABLE(); + } +} + +// SVEIntCompareSignedImm. + +void Assembler::cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0101 ..0. .... 100. .... ...0 .... + // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPEQ_p_p_zi); +} + +void Assembler::cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0101 ..0. .... 000. .... ...0 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPGE_p_p_zi); +} + +void Assembler::cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0101 ..0. .... 000. .... ...1 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPGT_p_p_zi); +} + +void Assembler::cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0101 ..0. .... 001. .... ...1 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> + // | ne<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPLE_p_p_zi); +} + +void Assembler::cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0101 ..0. .... 001. .... ...0 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> + // | ne<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPLT_p_p_zi); +} + +void Assembler::cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0101 ..0. .... 100. .... ...1 .... + // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPNE_p_p_zi); +} + +// SVEIntCompareUnsignedImm. + +void Assembler::cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0100 ..1. .... ..0. .... ...1 .... + // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPHI_p_p_zi); +} + +void Assembler::cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0100 ..1. .... ..0. .... ...0 .... + // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPHS_p_p_zi); +} + +void Assembler::cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0100 ..1. .... ..1. .... ...0 .... + // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPLO_p_p_zi); +} + +void Assembler::cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> + // 0010 0100 ..1. .... ..1. .... ...1 .... + // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPLS_p_p_zi); +} + +// SVEIntCompareVectors. + +// This prototype maps to 2 instruction encodings: +// CMPEQ_p_p_zw +// CMPEQ_p_p_zz +void Assembler::cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPEQ_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPEQ_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPGE_p_p_zw +// CMPGE_p_p_zz +void Assembler::cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPGE_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPGE_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPGT_p_p_zw +// CMPGT_p_p_zz +void Assembler::cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPGT_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPGT_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPHI_p_p_zw +// CMPHI_p_p_zz +void Assembler::cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPHI_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPHI_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPHS_p_p_zw +// CMPHS_p_p_zz +void Assembler::cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPHS_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPHS_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +void Assembler::cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmpge(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLE_p_p_zw); +} + +void Assembler::cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmphi(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLO_p_p_zw); +} + +void Assembler::cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmphs(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLS_p_p_zw); +} + +void Assembler::cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmpgt(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLT_p_p_zw); +} + +// This prototype maps to 2 instruction encodings: +// CMPNE_p_p_zw +// CMPNE_p_p_zz +void Assembler::cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPNE_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPNE_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// SVEIntMiscUnpredicated. + +void Assembler::fexpa(const ZRegister& zd, const ZRegister& zn) { + // FEXPA <Zd>.<T>, <Zn>.<T> + // 0000 0100 ..10 0000 1011 10.. .... .... + // size<23:22> | opc<20:16> = 00000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FEXPA_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::ftssel(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..1. .... 1011 00.. .... .... + // size<23:22> | Zm<20:16> | op<10> = 0 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FTSSEL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::movprfx(const ZRegister& zd, const ZRegister& zn) { + // MOVPRFX <Zd>, <Zn> + // 0000 0100 0010 0000 1011 11.. .... .... + // opc<23:22> = 00 | opc2<20:16> = 00000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(MOVPRFX_z_z | Rd(zd) | Rn(zn)); +} + +// SVEIntMulAddPredicated. + +void Assembler::mad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // MAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> + // 0000 0100 ..0. .... 110. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Za<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + + Emit(MAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za)); +} + +void Assembler::mla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..0. .... 010. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(MLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::mls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> + // 0000 0100 ..0. .... 011. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(MLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::msb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // MSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> + // 0000 0100 ..0. .... 111. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Za<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + + Emit(MSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za)); +} + +// SVEIntMulAddUnpredicated. + +void Assembler::sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zm, zn)); + + Emit(SDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zm, zn)); + + Emit(UDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// SVEIntReduction. + +void Assembler::andv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(ANDV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::eorv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(EORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::movprfx(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn) { + // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> + // 0000 0100 ..01 000. 001. .... .... .... + // size<23:22> | opc<18:17> = 00 | M<16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + VIXL_ASSERT(!pg.HasLaneSize()); + + Instr m = pg.IsMerging() ? 0x00010000 : 0x00000000; + Emit(MOVPRFX_z_p_z | SVESize(zd) | m | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::orv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(ORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::saddv(const VRegister& dd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(SADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::smaxv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(SMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sminv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(SMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uaddv(const VRegister& dd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(UADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::umaxv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(UMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uminv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(UMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +// SVEIntUnaryArithmeticPredicated. + +void Assembler::abs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 0110 101. .... .... .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(ABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1000 101. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CLS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::clz(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1001 101. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CLZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cnot(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1011 101. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CNOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cnt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1010 101. .... .... .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CNT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fabs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1100 101. .... .... .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fneg(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1101 101. .... .... .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::neg(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 0111 101. .... .... .... + // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(NEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::not_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 1110 101. .... .... .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(NOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sxtb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 0000 101. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes); + + Emit(SXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sxth(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 0010 101. .... .... .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes); + + Emit(SXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sxtw(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SXTW <Zd>.D, <Pg>/M, <Zn>.D + // 0000 0100 ..01 0100 101. .... .... .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes); + + Emit(SXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uxtb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 0001 101. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes); + + Emit(UXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uxth(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0100 ..01 0011 101. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes); + + Emit(UXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uxtw(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // UXTW <Zd>.D, <Pg>/M, <Zn>.D + // 0000 0100 ..01 0101 101. .... .... .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes); + + Emit(UXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +// SVEIntWideImmPredicated. + +void Assembler::cpy(const ZRegister& zd, + const PRegister& pg, + int imm8, + int shift) { + // CPY <Zd>.<T>, <Pg>/<ZM>, #<imm>{, <shift>} + // 0000 0101 ..01 .... 0... .... .... .... + // size<23:22> | Pg<19:16> | M<14> | sh<13> | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + + ResolveSVEImm8Shift(&imm8, &shift); + + Instr sh = (shift > 0) ? (1 << 13) : 0; + Instr m = pg.IsMerging() ? (1 << 14) : 0; + Emit(CPY_z_p_i | m | sh | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | + ImmField<12, 5>(imm8)); +} + +void Assembler::fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) { + // FCPY <Zd>.<T>, <Pg>/M, #<const> + // 0000 0101 ..01 .... 110. .... .... .... + // size<23:22> | Pg<19:16> | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Instr imm_field = ImmUnsignedField<12, 5>(FP64ToImm8(imm)); + Emit(FCPY_z_p_i | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | imm_field); +} + +// SVEIntAddSubtractImmUnpredicated. + +void Assembler::SVEIntAddSubtractImmUnpredicatedHelper( + SVEIntAddSubtractImm_UnpredicatedOp op, + const ZRegister& zd, + int imm8, + int shift) { + if (shift < 0) { + VIXL_ASSERT(shift == -1); + // Derive the shift amount from the immediate. + if (IsUint8(imm8)) { + shift = 0; + } else if (IsUint16(imm8) && ((imm8 % 256) == 0)) { + imm8 /= 256; + shift = 8; + } + } + + VIXL_ASSERT(IsUint8(imm8)); + VIXL_ASSERT((shift == 0) || (shift == 8)); + + Instr shift_bit = (shift > 0) ? (1 << 13) : 0; + Emit(op | SVESize(zd) | Rd(zd) | shift_bit | ImmUnsignedField<12, 5>(imm8)); +} + +void Assembler::add(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0000 11.. .... .... .... + // size<23:22> | opc<18:16> = 000 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(ADD_z_zi, zd, imm8, shift); +} + +void Assembler::dup(const ZRegister& zd, int imm8, int shift) { + // DUP <Zd>.<T>, #<imm>{, <shift>} + // 0010 0101 ..11 1000 11.. .... .... .... + // size<23:22> | opc<18:17> = 00 | sh<13> | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + ResolveSVEImm8Shift(&imm8, &shift); + VIXL_ASSERT((shift < 8) || !zd.IsLaneSizeB()); + + Instr shift_bit = (shift > 0) ? (1 << 13) : 0; + Emit(DUP_z_i | SVESize(zd) | Rd(zd) | shift_bit | ImmField<12, 5>(imm8)); +} + +void Assembler::fdup(const ZRegister& zd, double imm) { + // FDUP <Zd>.<T>, #<const> + // 0010 0101 ..11 1001 110. .... .... .... + // size<23:22> | opc<18:17> = 00 | o2<13> = 0 | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Instr encoded_imm = FP64ToImm8(imm) << 5; + Emit(FDUP_z_i | SVESize(zd) | encoded_imm | Rd(zd)); +} + +void Assembler::mul(const ZRegister& zd, const ZRegister& zn, int imm8) { + // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm> + // 0010 0101 ..11 0000 110. .... .... .... + // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(MUL_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); +} + +void Assembler::smax(const ZRegister& zd, const ZRegister& zn, int imm8) { + // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> + // 0010 0101 ..10 1000 110. .... .... .... + // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(SMAX_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); +} + +void Assembler::smin(const ZRegister& zd, const ZRegister& zn, int imm8) { + // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> + // 0010 0101 ..10 1010 110. .... .... .... + // size<23:22> | opc<18:16> = 010 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(SMIN_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); +} + +void Assembler::sqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0100 11.. .... .... .... + // size<23:22> | opc<18:16> = 100 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SQADD_z_zi, zd, imm8, shift); +} + +void Assembler::sqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0110 11.. .... .... .... + // size<23:22> | opc<18:16> = 110 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SQSUB_z_zi, zd, imm8, shift); +} + +void Assembler::sub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0001 11.. .... .... .... + // size<23:22> | opc<18:16> = 001 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SUB_z_zi, zd, imm8, shift); +} + +void Assembler::subr(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0011 11.. .... .... .... + // size<23:22> | opc<18:16> = 011 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SUBR_z_zi, zd, imm8, shift); +} + +void Assembler::umax(const ZRegister& zd, const ZRegister& zn, int imm8) { + // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> + // 0010 0101 ..10 1001 110. .... .... .... + // size<23:22> | opc<18:16> = 001 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(UMAX_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8)); +} + +void Assembler::umin(const ZRegister& zd, const ZRegister& zn, int imm8) { + // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> + // 0010 0101 ..10 1011 110. .... .... .... + // size<23:22> | opc<18:16> = 011 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(UMIN_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8)); +} + +void Assembler::uqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0101 11.. .... .... .... + // size<23:22> | opc<18:16> = 101 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(UQADD_z_zi, zd, imm8, shift); +} + +void Assembler::uqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} + // 0010 0101 ..10 0111 11.. .... .... .... + // size<23:22> | opc<18:16> = 111 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(UQSUB_z_zi, zd, imm8, shift); +} + +// SVEMemLoad. + +void Assembler::SVELdSt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_signed, + Instr op) { + VIXL_ASSERT(addr.IsContiguous()); + + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr); + Instr dtype = + SVEDtype(msize_in_bytes_log2, zt.GetLaneSizeInBytesLog2(), is_signed); + Emit(op | mem_op | dtype | Rt(zt) | PgLow8(pg)); +} + +void Assembler::SVELdSt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr, + Instr op) { + VIXL_ASSERT((num_regs >= 2) && (num_regs <= 4)); + + unsigned msize_in_bytes_log2 = zt1.GetLaneSizeInBytesLog2(); + Instr num = (num_regs - 1) << 21; + Instr msz = msize_in_bytes_log2 << 23; + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, num_regs, addr); + Emit(op | mem_op | msz | num | Rt(zt1) | PgLow8(pg)); +} + +void Assembler::SVELd1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed) { + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); + if (is_signed) { + // Sign-extension is only possible when the vector elements are larger than + // the elements in memory. + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); + } + + if (addr.IsScatterGather()) { + bool is_load = true; + bool is_ff = false; + SVEScatterGatherHelper(msize_in_bytes_log2, + zt, + pg, + addr, + is_load, + is_signed, + is_ff); + return; + } + + Instr op = 0xffffffff; + if (addr.IsScalarPlusImmediate()) { + op = SVEContiguousLoad_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + op = SVEContiguousLoad_ScalarPlusScalarFixed; + } else { + VIXL_UNIMPLEMENTED(); + } + SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op); +} + +void Assembler::SVELdff1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed) { + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); + if (is_signed) { + // Sign-extension is only possible when the vector elements are larger than + // the elements in memory. + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); + } + + if (addr.IsScatterGather()) { + bool is_load = true; + bool is_ff = true; + SVEScatterGatherHelper(msize_in_bytes_log2, + zt, + pg, + addr, + is_load, + is_signed, + is_ff); + return; + } + + if (addr.IsPlainScalar()) { + // SVEMemOperand(x0) is treated as a scalar-plus-immediate form ([x0, #0]). + // In these instructions, we want to treat it as [x0, xzr]. + SVEMemOperand addr_scalar_plus_scalar(addr.GetScalarBase(), xzr); + // Guard against infinite recursion. + VIXL_ASSERT(!addr_scalar_plus_scalar.IsPlainScalar()); + SVELdff1Helper(msize_in_bytes_log2, + zt, + pg, + addr_scalar_plus_scalar, + is_signed); + return; + } + + Instr op = 0xffffffff; + if (addr.IsScalarPlusScalar()) { + op = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed; + } else { + VIXL_UNIMPLEMENTED(); + } + SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op); +} + +void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_load, + bool is_signed, + bool is_first_fault) { + VIXL_ASSERT(addr.IsScatterGather()); + VIXL_ASSERT(zt.IsLaneSizeS() || zt.IsLaneSizeD()); + VIXL_ASSERT(is_load || !is_first_fault); + VIXL_ASSERT(is_load || !is_signed); + + Instr op = 0xffffffff; + if (addr.IsVectorPlusImmediate()) { + VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorBase())); + if (is_load) { + if (zt.IsLaneSizeS()) { + op = SVE32BitGatherLoad_VectorPlusImmFixed; + } else { + op = SVE64BitGatherLoad_VectorPlusImmFixed; + } + } else { + if (zt.IsLaneSizeS()) { + op = SVE32BitScatterStore_VectorPlusImmFixed; + } else { + op = SVE64BitScatterStore_VectorPlusImmFixed; + } + } + } else { + VIXL_ASSERT(addr.IsScalarPlusVector()); + VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorOffset())); + SVEOffsetModifier mod = addr.GetOffsetModifier(); + if (zt.IsLaneSizeS()) { + VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW)); + unsigned shift_amount = addr.GetShiftAmount(); + if (shift_amount == 0) { + if (is_load) { + op = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed; + } + } else if (shift_amount == 1) { + VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2); + if (is_load) { + op = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; + } + } else { + VIXL_ASSERT(shift_amount == 2); + VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2); + if (is_load) { + op = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; + } + } + } else if (zt.IsLaneSizeD()) { + switch (mod) { + case NO_SVE_OFFSET_MODIFIER: + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed; + } else { + op = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed; + } + break; + case SVE_LSL: + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed; + } else { + op = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed; + } + break; + case SVE_UXTW: + case SVE_SXTW: { + unsigned shift_amount = addr.GetShiftAmount(); + if (shift_amount == 0) { + if (is_load) { + op = + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } else { + op = + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } + } else { + VIXL_ASSERT(shift_amount == msize_in_bytes_log2); + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; + } else { + op = + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed; + } + } + break; + } + default: + VIXL_UNIMPLEMENTED(); + } + } + } + + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr, is_load); + Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2); + Instr u = (!is_load || is_signed) ? 0 : (1 << 14); + Instr ff = is_first_fault ? (1 << 13) : 0; + Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg)); +} + +void Assembler::SVELd234Helper(int num_regs, + const ZRegister& zt1, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + } + + Instr op; + if (addr.IsScalarPlusImmediate()) { + op = SVELoadMultipleStructures_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + op = SVELoadMultipleStructures_ScalarPlusScalarFixed; + } else { + // These instructions don't support any other addressing modes. + VIXL_ABORT(); + } + SVELdSt234Helper(num_regs, zt1, pg, addr, op); +} + +// SVEMemContiguousLoad. + +#define VIXL_DEFINE_LD1(MSZ, LANE_SIZE) \ + void Assembler::ld1##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \ + } +#define VIXL_DEFINE_LD2(MSZ, LANE_SIZE) \ + void Assembler::ld2##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVELd234Helper(2, zt1, pg, addr); \ + } +#define VIXL_DEFINE_LD3(MSZ, LANE_SIZE) \ + void Assembler::ld3##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVELd234Helper(3, zt1, pg, addr); \ + } +#define VIXL_DEFINE_LD4(MSZ, LANE_SIZE) \ + void Assembler::ld4##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const ZRegister& zt4, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3, zt4); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVELd234Helper(4, zt1, pg, addr); \ + } + +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD1) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD2) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD3) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD4) + +#define VIXL_DEFINE_LD1S(MSZ, LANE_SIZE) \ + void Assembler::ld1s##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \ + } +VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LD1S) + +// SVEMem32BitGatherAndUnsizedContiguous. + +void Assembler::SVELd1BroadcastHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed) { + VIXL_ASSERT(addr.IsScalarPlusImmediate()); + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); + if (is_signed) { + // Sign-extension is only possible when the vector elements are larger than + // the elements in memory. + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); + } + + int64_t imm = addr.GetImmediateOffset(); + int divisor = 1 << msize_in_bytes_log2; + VIXL_ASSERT(imm % divisor == 0); + Instr dtype = SVEDtypeSplit(msize_in_bytes_log2, + zt.GetLaneSizeInBytesLog2(), + is_signed); + + Emit(SVELoadAndBroadcastElementFixed | dtype | RnSP(addr.GetScalarBase()) | + ImmUnsignedField<21, 16>(imm / divisor) | Rt(zt) | PgLow8(pg)); +} + +// This prototype maps to 4 instruction encodings: +// LD1RB_z_p_bi_u16 +// LD1RB_z_p_bi_u32 +// LD1RB_z_p_bi_u64 +// LD1RB_z_p_bi_u8 +void Assembler::ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, false); +} + +// This prototype maps to 3 instruction encodings: +// LD1RH_z_p_bi_u16 +// LD1RH_z_p_bi_u32 +// LD1RH_z_p_bi_u64 +void Assembler::ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, false); +} + +// This prototype maps to 2 instruction encodings: +// LD1RW_z_p_bi_u32 +// LD1RW_z_p_bi_u64 +void Assembler::ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kSRegSizeInBytesLog2, zt, pg, addr, false); +} + +void Assembler::ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kDRegSizeInBytesLog2, zt, pg, addr, false); +} + +// This prototype maps to 3 instruction encodings: +// LD1RSB_z_p_bi_s16 +// LD1RSB_z_p_bi_s32 +// LD1RSB_z_p_bi_s64 +void Assembler::ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, true); +} + +// This prototype maps to 2 instruction encodings: +// LD1RSH_z_p_bi_s32 +// LD1RSH_z_p_bi_s64 +void Assembler::ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, true); +} + +void Assembler::ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kWRegSizeInBytesLog2, zt, pg, addr, true); +} + +void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) { + // LDR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}] + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + int64_t imm9 = addr.GetImmediateOffset(); + VIXL_ASSERT(IsInt9(imm9)); + Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10; + Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16; + + Instr op = LDR_z_bi; + if (rt.IsPRegister()) { + op = LDR_p_bi; + } + Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l); +} + +// SVEMem64BitGather. + +// This prototype maps to 3 instruction encodings: +// LDFF1B_z_p_bz_d_64_unscaled +// LDFF1B_z_p_bz_d_x32_unscaled +void Assembler::ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] + // 1100 0100 010. .... 111. .... .... .... + // msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1B_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1B_z_p_ai_d +// LDFF1B_z_p_ai_s +void Assembler::ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0100 001. .... 111. .... .... .... + // msz<24:23> = 00 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1B_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +// This prototype maps to 4 instruction encodings: +// LDFF1D_z_p_bz_d_64_scaled +// LDFF1D_z_p_bz_d_64_unscaled +// LDFF1D_z_p_bz_d_x32_scaled +// LDFF1D_z_p_bz_d_x32_unscaled +void Assembler::ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] + // 1100 0101 111. .... 111. .... .... .... + // msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1D_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +void Assembler::ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0101 101. .... 111. .... .... .... + // msz<24:23> = 11 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1D_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +// This prototype maps to 6 instruction encodings: +// LDFF1H_z_p_bz_d_64_scaled +// LDFF1H_z_p_bz_d_64_unscaled +// LDFF1H_z_p_bz_d_x32_scaled +// LDFF1H_z_p_bz_d_x32_unscaled +void Assembler::ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] + // 1100 0100 111. .... 111. .... .... .... + // msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1H_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1H_z_p_ai_d +// LDFF1H_z_p_ai_s +void Assembler::ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0100 101. .... 111. .... .... .... + // msz<24:23> = 01 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1H_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +// This prototype maps to 3 instruction encodings: +// LDFF1SB_z_p_bz_d_64_unscaled +// LDFF1SB_z_p_bz_d_x32_unscaled +void Assembler::ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] + // 1100 0100 010. .... 101. .... .... .... + // msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SB_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1SB_z_p_ai_d +// LDFF1SB_z_p_ai_s +void Assembler::ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0100 001. .... 101. .... .... .... + // msz<24:23> = 00 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SB_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | + ImmField<20, 16>(imm5)); +} + +// This prototype maps to 6 instruction encodings: +// LDFF1SH_z_p_bz_d_64_scaled +// LDFF1SH_z_p_bz_d_64_unscaled +// LDFF1SH_z_p_bz_d_x32_scaled +// LDFF1SH_z_p_bz_d_x32_unscaled +void Assembler::ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] + // 1100 0100 111. .... 101. .... .... .... + // msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SH_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1SH_z_p_ai_d +// LDFF1SH_z_p_ai_s +void Assembler::ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0100 101. .... 101. .... .... .... + // msz<24:23> = 01 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SH_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | + ImmField<20, 16>(imm5)); +} + +// This prototype maps to 4 instruction encodings: +// LDFF1SW_z_p_bz_d_64_scaled +// LDFF1SW_z_p_bz_d_64_unscaled +// LDFF1SW_z_p_bz_d_x32_scaled +// LDFF1SW_z_p_bz_d_x32_unscaled +void Assembler::ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] + // 1100 0101 011. .... 101. .... .... .... + // msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SW_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +void Assembler::ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0101 001. .... 101. .... .... .... + // msz<24:23> = 10 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SW_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | + ImmField<20, 16>(imm5)); +} + +// This prototype maps to 6 instruction encodings: +// LDFF1W_z_p_bz_d_64_scaled +// LDFF1W_z_p_bz_d_64_unscaled +// LDFF1W_z_p_bz_d_x32_scaled +// LDFF1W_z_p_bz_d_x32_unscaled +void Assembler::ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] + // 1100 0101 011. .... 111. .... .... .... + // msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1W_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1W_z_p_ai_d +// LDFF1W_z_p_ai_s +void Assembler::ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] + // 1100 0101 001. .... 111. .... .... .... + // msz<24:23> = 10 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1W_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +void Assembler::SVEGatherPrefetchVectorPlusImmediateHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsVectorPlusImmediate()); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + + Instr op = 0xffffffff; + switch (prefetch_size) { + case kBRegSize: + op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_ai_s) + : static_cast<Instr>(PRFB_i_p_ai_d); + break; + case kHRegSize: + op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_ai_s) + : static_cast<Instr>(PRFH_i_p_ai_d); + break; + case kSRegSize: + op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_ai_s) + : static_cast<Instr>(PRFW_i_p_ai_d); + break; + case kDRegSize: + op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_ai_s) + : static_cast<Instr>(PRFD_i_p_ai_d); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + int64_t imm5 = addr.GetImmediateOffset(); + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | Rn(zn) | + ImmUnsignedField<20, 16>(imm5)); +} + +void Assembler::SVEGatherPrefetchScalarPlusImmediateHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsScalarPlusImmediate()); + int64_t imm6 = addr.GetImmediateOffset(); + + Instr op = 0xffffffff; + switch (prefetch_size) { + case kBRegSize: + op = PRFB_i_p_bi_s; + break; + case kHRegSize: + op = PRFH_i_p_bi_s; + break; + case kSRegSize: + op = PRFW_i_p_bi_s; + break; + case kDRegSize: + op = PRFD_i_p_bi_s; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | + RnSP(addr.GetScalarBase()) | ImmField<21, 16>(imm6)); +} + +void Assembler::SVEContiguousPrefetchScalarPlusScalarHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsScalarPlusScalar()); + Instr op = 0xffffffff; + + switch (prefetch_size) { + case kBRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER); + op = PRFB_i_p_br_s; + break; + case kHRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); + VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2); + op = PRFH_i_p_br_s; + break; + case kSRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); + VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2); + op = PRFW_i_p_br_s; + break; + case kDRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); + VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2); + op = PRFD_i_p_br_s; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | + RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset())); +} + +void Assembler::SVEContiguousPrefetchScalarPlusVectorHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsScalarPlusVector()); + ZRegister zm = addr.GetVectorOffset(); + SVEOffsetModifier mod = addr.GetOffsetModifier(); + + // All prefetch scalar-plus-vector addressing modes use a shift corresponding + // to the element size. + switch (prefetch_size) { + case kBRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kBRegSizeInBytesLog2); + break; + case kHRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2); + break; + case kSRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2); + break; + case kDRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Instr sx = 0; + Instr op = 0xffffffff; + if ((mod == NO_SVE_OFFSET_MODIFIER) || (mod == SVE_LSL)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + + switch (prefetch_size) { + case kBRegSize: + VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); + op = PRFB_i_p_bz_d_64_scaled; + break; + case kHRegSize: + VIXL_ASSERT(mod == SVE_LSL); + op = PRFH_i_p_bz_d_64_scaled; + break; + case kSRegSize: + VIXL_ASSERT(mod == SVE_LSL); + op = PRFW_i_p_bz_d_64_scaled; + break; + case kDRegSize: + VIXL_ASSERT(mod == SVE_LSL); + op = PRFD_i_p_bz_d_64_scaled; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } else { + VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW)); + VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD()); + + switch (prefetch_size) { + case kBRegSize: + op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_bz_s_x32_scaled) + : static_cast<Instr>(PRFB_i_p_bz_d_x32_scaled); + break; + case kHRegSize: + op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_bz_s_x32_scaled) + : static_cast<Instr>(PRFH_i_p_bz_d_x32_scaled); + break; + case kSRegSize: + op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_bz_s_x32_scaled) + : static_cast<Instr>(PRFW_i_p_bz_d_x32_scaled); + break; + case kDRegSize: + op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_bz_s_x32_scaled) + : static_cast<Instr>(PRFD_i_p_bz_d_x32_scaled); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (mod == SVE_SXTW) { + sx = 1 << 22; + } + } + + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | sx | + RnSP(addr.GetScalarBase()) | Rm(zm)); +} + +void Assembler::SVEPrefetchHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + if (addr.IsVectorPlusImmediate()) { + // For example: + // [z0.s, #0] + SVEGatherPrefetchVectorPlusImmediateHelper(prfop, pg, addr, prefetch_size); + + } else if (addr.IsScalarPlusImmediate()) { + // For example: + // [x0, #42, mul vl] + SVEGatherPrefetchScalarPlusImmediateHelper(prfop, pg, addr, prefetch_size); + + } else if (addr.IsScalarPlusVector()) { + // For example: + // [x0, z0.s, sxtw] + SVEContiguousPrefetchScalarPlusVectorHelper(prfop, pg, addr, prefetch_size); + + } else if (addr.IsScalarPlusScalar()) { + // For example: + // [x0, x1] + SVEContiguousPrefetchScalarPlusScalarHelper(prfop, pg, addr, prefetch_size); + + } else { + VIXL_UNIMPLEMENTED(); + } +} + +void Assembler::prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kBRegSize); +} + +void Assembler::prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kDRegSize); +} + +void Assembler::prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kHRegSize); +} + +void Assembler::prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kSRegSize); +} + +void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + Instr regoffset_op, + Instr immoffset_op, + int imm_divisor) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsScalarPlusScalar() || addr.IsScalarPlusImmediate()); + + Instr op; + if (addr.IsScalarPlusScalar()) { + op = regoffset_op | Rm(addr.GetScalarOffset()); + } else { + int64_t imm = addr.GetImmediateOffset(); + VIXL_ASSERT(((imm % imm_divisor) == 0) && IsInt4(imm / imm_divisor)); + op = immoffset_op | ImmField<19, 16>(imm / imm_divisor); + } + Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase())); +} + +void Assembler::ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(0)); + VIXL_ASSERT(zt.IsLaneSizeB()); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LD1RQB_z_p_br_contiguous, + LD1RQB_z_p_bi_u8, + 16); +} + +void Assembler::ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(3)); + VIXL_ASSERT(zt.IsLaneSizeD()); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LD1RQD_z_p_br_contiguous, + LD1RQD_z_p_bi_u64, + 16); +} + +void Assembler::ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(1)); + VIXL_ASSERT(zt.IsLaneSizeH()); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LD1RQH_z_p_br_contiguous, + LD1RQH_z_p_bi_u16, + 16); +} + +void Assembler::ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(2)); + VIXL_ASSERT(zt.IsLaneSizeS()); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LD1RQW_z_p_br_contiguous, + LD1RQW_z_p_bi_u32, + 16); +} + +#define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE) \ + void Assembler::ldff1##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \ + } +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LDFF1) + +#define VIXL_DEFINE_LDFF1S(MSZ, LANE_SIZE) \ + void Assembler::ldff1s##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \ + } +VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LDFF1S) + +void Assembler::ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(0, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(3, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(1, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(0, + zt, + pg, + addr, + /* is_signed = */ true, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(1, + zt, + pg, + addr, + /* is_signed = */ true, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(2, + zt, + pg, + addr, + /* is_signed = */ true, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(2, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1B_z_p_br_contiguous, + LDNT1B_z_p_bi_contiguous); +} + +void Assembler::ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1D_z_p_br_contiguous, + LDNT1D_z_p_bi_contiguous); +} + +void Assembler::ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1H_z_p_br_contiguous, + LDNT1H_z_p_bi_contiguous); +} + +void Assembler::ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1W_z_p_br_contiguous, + LDNT1W_z_p_bi_contiguous); +} + +Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, + int num_regs, + const SVEMemOperand& addr, + bool is_load) { + VIXL_ASSERT((num_regs >= 1) && (num_regs <= 4)); + + Instr op = 0xfffffff; + if (addr.IsScalarPlusImmediate()) { + VIXL_ASSERT((addr.GetImmediateOffset() == 0) || addr.IsMulVl()); + int64_t imm = addr.GetImmediateOffset(); + VIXL_ASSERT((imm % num_regs) == 0); + op = RnSP(addr.GetScalarBase()) | ImmField<19, 16>(imm / num_regs); + + } else if (addr.IsScalarPlusScalar()) { + VIXL_ASSERT(addr.GetScalarOffset().IsZero() || + addr.IsEquivalentToLSL(msize_in_bytes_log2)); + op = RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset()); + + } else if (addr.IsVectorPlusImmediate()) { + ZRegister zn = addr.GetVectorBase(); + uint64_t imm = addr.GetImmediateOffset(); + VIXL_ASSERT(num_regs == 1); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2))); + op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2); + + } else if (addr.IsScalarPlusVector()) { + // We have to support several different addressing modes. Some instructions + // support a subset of these, but the SVEMemOperand encoding is consistent. + Register xn = addr.GetScalarBase(); + ZRegister zm = addr.GetVectorOffset(); + SVEOffsetModifier mod = addr.GetOffsetModifier(); + Instr modifier_bit = 1 << (is_load ? 22 : 14); + Instr xs = (mod == SVE_SXTW) ? modifier_bit : 0; + VIXL_ASSERT(num_regs == 1); + + if (mod == SVE_LSL) { + // 64-bit scaled offset: [<Xn|SP>, <Zm>.D, LSL #<shift>] + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(addr.GetShiftAmount() == msize_in_bytes_log2); + } else if (mod == NO_SVE_OFFSET_MODIFIER) { + // 64-bit unscaled offset: [<Xn|SP>, <Zm>.D] + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(addr.GetShiftAmount() == 0); + } else { + // 32-bit scaled offset: [<Xn|SP>, <Zm>.S, <mod> #<shift>] + // 32-bit unscaled offset: [<Xn|SP>, <Zm>.S, <mod>] + // 32-bit unpacked scaled offset: [<Xn|SP>, <Zm>.D, <mod> #<shift>] + // 32-bit unpacked unscaled offset: [<Xn|SP>, <Zm>.D, <mod>] + VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD()); + VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW)); + VIXL_ASSERT((addr.GetShiftAmount() == 0) || + (addr.GetShiftAmount() == msize_in_bytes_log2)); + } + + // The form itself is encoded in the instruction opcode. + op = RnSP(xn) | Rm(zm) | xs; + } else { + VIXL_UNIMPLEMENTED(); + } + + return op; +} + +// SVEMemStore. + +void Assembler::SVESt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + } + + if (addr.IsScatterGather()) { + bool is_load = false; + bool is_signed = false; + bool is_ff = false; + SVEScatterGatherHelper(msize_in_bytes_log2, + zt, + pg, + addr, + is_load, + is_signed, + is_ff); + return; + } + + Instr op; + if (addr.IsScalarPlusImmediate()) { + op = SVEContiguousStore_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + op = SVEContiguousStore_ScalarPlusScalarFixed; + } else { + VIXL_UNIMPLEMENTED(); + op = 0xffffffff; + } + SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, false, op); +} + +void Assembler::SVESt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr) { + if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + } + + Instr op; + if (addr.IsScalarPlusImmediate()) { + op = SVEStoreMultipleStructures_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + op = SVEStoreMultipleStructures_ScalarPlusScalarFixed; + } else { + // These instructions don't support any other addressing modes. + VIXL_ABORT(); + } + SVELdSt234Helper(num_regs, zt1, pg, addr, op); +} + +#define VIXL_DEFINE_ST1(MSZ, LANE_SIZE) \ + void Assembler::st1##MSZ(const ZRegister& zt, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVESt1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr); \ + } +#define VIXL_DEFINE_ST2(MSZ, LANE_SIZE) \ + void Assembler::st2##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVESt234Helper(2, zt1, pg, addr); \ + } +#define VIXL_DEFINE_ST3(MSZ, LANE_SIZE) \ + void Assembler::st3##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVESt234Helper(3, zt1, pg, addr); \ + } +#define VIXL_DEFINE_ST4(MSZ, LANE_SIZE) \ + void Assembler::st4##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const ZRegister& zt4, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3, zt4); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVESt234Helper(4, zt1, pg, addr); \ + } + +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST1) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST2) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST3) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST4) + +void Assembler::stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1B_z_p_br_contiguous, + STNT1B_z_p_bi_contiguous); +} + +void Assembler::stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1D_z_p_br_contiguous, + STNT1D_z_p_bi_contiguous); +} + +void Assembler::stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1H_z_p_br_contiguous, + STNT1H_z_p_bi_contiguous); +} + +void Assembler::stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2))); + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1W_z_p_br_contiguous, + STNT1W_z_p_bi_contiguous); +} + +void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) { + // STR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}] + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + int64_t imm9 = addr.GetImmediateOffset(); + VIXL_ASSERT(IsInt9(imm9)); + Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10; + Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16; + + Instr op = STR_z_bi; + if (rt.IsPRegister()) { + op = STR_p_bi; + } + Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l); +} + +// SVEMulIndex. + +void Assembler::sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + + Instr op = 0xffffffff; + switch (zda.GetLaneSizeInBits()) { + case kSRegSize: + VIXL_ASSERT(IsUint2(index)); + op = SDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn); + break; + case kDRegSize: + VIXL_ASSERT(IsUint1(index)); + op = SDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Emit(op); +} + +void Assembler::udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + + Instr op = 0xffffffff; + switch (zda.GetLaneSizeInBits()) { + case kSRegSize: + VIXL_ASSERT(IsUint2(index)); + op = UDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn); + break; + case kDRegSize: + VIXL_ASSERT(IsUint1(index)); + op = UDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Emit(op); +} + +// SVEPartitionBreak. + +void Assembler::brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000; + Emit(BRKA_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn)); +} + +void Assembler::brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Emit(BRKAS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +void Assembler::brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000; + Emit(BRKB_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn)); +} + +void Assembler::brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Emit(BRKBS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +void Assembler::brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + USE(pm); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + VIXL_ASSERT(pd.Is(pm)); + + Emit(BRKN_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +void Assembler::brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + USE(pm); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + VIXL_ASSERT(pd.Is(pm)); + + Emit(BRKNS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +// SVEPermutePredicate. + +void Assembler::punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + // PUNPKHI <Pd>.H, <Pn>.B + // 0000 0101 0011 0001 0100 000. ...0 .... + // H<16> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeH()); + VIXL_ASSERT(pn.IsLaneSizeB()); + + Emit(PUNPKHI_p_p | Pd(pd) | Pn(pn)); +} + +void Assembler::punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + // PUNPKLO <Pd>.H, <Pn>.B + // 0000 0101 0011 0000 0100 000. ...0 .... + // H<16> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeH()); + VIXL_ASSERT(pn.IsLaneSizeB()); + + Emit(PUNPKLO_p_p | Pd(pd) | Pn(pn)); +} + +void Assembler::rev(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + // REV <Pd>.<T>, <Pn>.<T> + // 0000 0101 ..11 0100 0100 000. ...0 .... + // size<23:22> | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn)); + + Emit(REV_p_p | SVESize(pd) | Pd(pd) | Rx<8, 5>(pn)); +} + +void Assembler::trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> + // 0000 0101 ..10 .... 0101 000. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(TRN1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> + // 0000 0101 ..10 .... 0101 010. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(TRN2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> + // 0000 0101 ..10 .... 0100 100. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(UZP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> + // 0000 0101 ..10 .... 0100 110. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(UZP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> + // 0000 0101 ..10 .... 0100 000. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(ZIP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> + // 0000 0101 ..10 .... 0100 010. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(ZIP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +// SVEPermuteVectorExtract. + +void Assembler::ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset) { + // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm> + // 0000 0101 001. .... 000. .... .... .... + // imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(IsUint8(offset)); + + int imm8h = ExtractUnsignedBitfield32(7, 3, offset); + int imm8l = ExtractUnsignedBitfield32(2, 0, offset); + Emit(EXT_z_zi_des | Rd(zd) | Rn(zm) | ImmUnsignedField<20, 16>(imm8h) | + ImmUnsignedField<12, 10>(imm8l)); +} + +// SVEPermuteVectorInterleaving. + +void Assembler::trn1(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TRN1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(TRN1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::trn2(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TRN2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(TRN2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uzp1(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UZP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UZP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uzp2(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UZP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UZP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::zip1(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ZIP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ZIP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::zip2(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ZIP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> + // 0000 0101 ..1. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ZIP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEPermuteVectorPredicated. + +void Assembler::clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> + // 0000 0101 ..11 0000 101. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Rdn<4:0> + + USE(rn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rd.Is(rn)); + + Emit(CLASTA_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> + // 0000 0101 ..10 1010 100. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Vdn<4:0> + + USE(vn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.Is(vn)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(vd, zm)); + + Emit(CLASTA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> + // 0000 0101 ..10 1000 100. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(CLASTA_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> + // 0000 0101 ..11 0001 101. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Rdn<4:0> + + USE(rn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rd.Is(rn)); + + Emit(CLASTB_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> + // 0000 0101 ..10 1011 100. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Vdn<4:0> + + USE(vn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.Is(vn)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(vd, zm)); + + Emit(CLASTB_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> + // 0000 0101 ..10 1001 100. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(CLASTB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::compact(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn) { + // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> + // 0000 0101 1.10 0001 100. .... .... .... + // sz<22> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT((zd.GetLaneSizeInBits() == kSRegSize) || + (zd.GetLaneSizeInBits() == kDRegSize)); + + Instr sz = (zd.GetLaneSizeInBits() == kDRegSize) ? (1 << 22) : 0; + Emit(COMPACT_z_p_z | sz | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cpy(const ZRegister& zd, + const PRegisterM& pg, + const Register& rn) { + // CPY <Zd>.<T>, <Pg>/M, <R><n|SP> + // 0000 0101 ..10 1000 101. .... .... .... + // size<23:22> | Pg<12:10> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(CPY_z_p_r | SVESize(zd) | Rd(zd) | PgLow8(pg) | RnSP(rn)); +} + +void Assembler::cpy(const ZRegister& zd, + const PRegisterM& pg, + const VRegister& vn) { + // CPY <Zd>.<T>, <Pg>/M, <V><n> + // 0000 0101 ..10 0000 100. .... .... .... + // size<23:22> | Pg<12:10> | Vn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vn.IsScalar()); + VIXL_ASSERT(static_cast<unsigned>(vn.GetSizeInBits()) == + zd.GetLaneSizeInBits()); + + Emit(CPY_z_p_v | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(vn)); +} + +void Assembler::lasta(const Register& rd, + const PRegister& pg, + const ZRegister& zn) { + // LASTA <R><d>, <Pg>, <Zn>.<T> + // 0000 0101 ..10 0000 101. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LASTA_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::lasta(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // LASTA <V><d>, <Pg>, <Zn>.<T> + // 0000 0101 ..10 0010 100. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(LASTA_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::lastb(const Register& rd, + const PRegister& pg, + const ZRegister& zn) { + // LASTB <R><d>, <Pg>, <Zn>.<T> + // 0000 0101 ..10 0001 101. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LASTB_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::lastb(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // LASTB <V><d>, <Pg>, <Zn>.<T> + // 0000 0101 ..10 0011 100. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(LASTB_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::rbit(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0101 ..10 0111 100. .... .... .... + // size<23:22> | opc<17:16> = 11 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(RBIT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::revb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0101 ..10 0100 100. .... .... .... + // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.IsLaneSizeH() || zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(REVB_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::revh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> + // 0000 0101 ..10 0101 100. .... .... .... + // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(REVH_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::revw(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // REVW <Zd>.D, <Pg>/M, <Zn>.D + // 0000 0101 ..10 0110 100. .... .... .... + // size<23:22> | opc<17:16> = 10 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(REVW_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> + // 0000 0101 ..10 1100 100. .... .... .... + // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEPermuteVectorUnpredicated. + +void Assembler::dup(const ZRegister& zd, const Register& xn) { + // DUP <Zd>.<T>, <R><n|SP> + // 0000 0101 ..10 0000 0011 10.. .... .... + // size<23:22> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(DUP_z_r | SVESize(zd) | Rd(zd) | RnSP(xn)); +} + +void Assembler::dup(const ZRegister& zd, const ZRegister& zn, unsigned index) { + // DUP <Zd>.<T>, <Zn>.<T>[<imm>] + // 0000 0101 ..1. .... 0010 00.. .... .... + // imm2<23:22> | tsz<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(zd, zn)); + VIXL_ASSERT((index * zd.GetLaneSizeInBits()) < 512); + int n = zd.GetLaneSizeInBytesLog2(); + unsigned imm_7 = (index << (n + 1)) | (1 << n); + VIXL_ASSERT(IsUint7(imm_7)); + unsigned imm_2 = ExtractUnsignedBitfield32(6, 5, imm_7); + unsigned tsz_5 = ExtractUnsignedBitfield32(4, 0, imm_7); + + Emit(DUP_z_zi | ImmUnsignedField<23, 22>(imm_2) | + ImmUnsignedField<20, 16>(tsz_5) | Rd(zd) | Rn(zn)); +} + +void Assembler::insr(const ZRegister& zdn, const Register& rm) { + // INSR <Zdn>.<T>, <R><m> + // 0000 0101 ..10 0100 0011 10.. .... .... + // size<23:22> | Rm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(INSR_z_r | SVESize(zdn) | Rd(zdn) | Rn(rm)); +} + +void Assembler::insr(const ZRegister& zdn, const VRegister& vm) { + // INSR <Zdn>.<T>, <V><m> + // 0000 0101 ..11 0100 0011 10.. .... .... + // size<23:22> | Vm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vm.IsScalar()); + + Emit(INSR_z_v | SVESize(zdn) | Rd(zdn) | Rn(vm)); +} + +void Assembler::rev(const ZRegister& zd, const ZRegister& zn) { + // REV <Zd>.<T>, <Zn>.<T> + // 0000 0101 ..11 1000 0011 10.. .... .... + // size<23:22> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(zd, zn)); + + Emit(REV_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::sunpkhi(const ZRegister& zd, const ZRegister& zn) { + // SUNPKHI <Zd>.<T>, <Zn>.<Tb> + // 0000 0101 ..11 0001 0011 10.. .... .... + // size<23:22> | U<17> = 0 | H<16> = 1 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(SUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::sunpklo(const ZRegister& zd, const ZRegister& zn) { + // SUNPKLO <Zd>.<T>, <Zn>.<Tb> + // 0000 0101 ..11 0000 0011 10.. .... .... + // size<23:22> | U<17> = 0 | H<16> = 0 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(SUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::tbl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TBL <Zd>.<T>, { <Zn>.<T> }, <Zm>.<T> + // 0000 0101 ..1. .... 0011 00.. .... .... + // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(TBL_z_zz_1 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uunpkhi(const ZRegister& zd, const ZRegister& zn) { + // UUNPKHI <Zd>.<T>, <Zn>.<Tb> + // 0000 0101 ..11 0011 0011 10.. .... .... + // size<23:22> | U<17> = 1 | H<16> = 1 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(UUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::uunpklo(const ZRegister& zd, const ZRegister& zn) { + // UUNPKLO <Zd>.<T>, <Zn>.<Tb> + // 0000 0101 ..11 0010 0011 10.. .... .... + // size<23:22> | U<17> = 1 | H<16> = 0 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(UUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +// SVEPredicateCount. + +void Assembler::cntp(const Register& xd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + // CNTP <Xd>, <Pg>, <Pn>.<T> + // 0010 0101 ..10 0000 10.. ..0. .... .... + // size<23:22> | opc<18:16> = 000 | Pg<13:10> | o2<9> = 0 | Pn<8:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(pg.IsUnqualified()); + if (pg.HasLaneSize()) VIXL_ASSERT(AreSameFormat(pg, pn)); + + Emit(CNTP_r_p_p | SVESize(pn) | Rd(xd) | Pg<13, 10>(pg) | Pn(pn)); +} + +// SVEPredicateLogicalOp. +void Assembler::and_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(AND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(BIC_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(BICS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(EOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(EORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NAND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORN_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORNS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORRS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Emit(SEL_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +// SVEPredicateMisc. + +void Assembler::pfalse(const PRegisterWithLaneSize& pd) { + // PFALSE <Pd>.B + // 0010 0101 0001 1000 1110 0100 0000 .... + // op<23> = 0 | S<22> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + // Ignore the lane size, since it makes no difference to the operation. + + Emit(PFALSE_p | Pd(pd)); +} + +void Assembler::pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + // PFIRST <Pdn>.B, <Pg>, <Pdn>.B + // 0010 0101 0101 1000 1100 000. ...0 .... + // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pdn<3:0> + + USE(pn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.Is(pn)); + VIXL_ASSERT(pd.IsLaneSizeB()); + + Emit(PFIRST_p_p_p | Pd(pd) | Pg<8, 5>(pg)); +} + +void Assembler::pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + // PNEXT <Pdn>.<T>, <Pg>, <Pdn>.<T> + // 0010 0101 ..01 1001 1100 010. ...0 .... + // size<23:22> | Pg<8:5> | Pdn<3:0> + + USE(pn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.Is(pn)); + + Emit(PNEXT_p_p_p | SVESize(pd) | Pd(pd) | Pg<8, 5>(pg)); +} + +void Assembler::ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) { + // PTEST <Pg>, <Pn>.B + // 0010 0101 0101 0000 11.. ..0. ...0 0000 + // op<23> = 0 | S<22> = 1 | Pg<13:10> | Pn<8:5> | opc2<3:0> = 0000 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pn.IsLaneSizeB()); + + Emit(PTEST_p_p | Pg<13, 10>(pg) | Rx<8, 5>(pn)); +} + +void Assembler::ptrue(const PRegisterWithLaneSize& pd, int pattern) { + // PTRUE <Pd>.<T>{, <pattern>} + // 0010 0101 ..01 1000 1110 00.. ...0 .... + // size<23:22> | S<16> = 0 | pattern<9:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(PTRUE_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern)); +} + +void Assembler::ptrues(const PRegisterWithLaneSize& pd, int pattern) { + // PTRUES <Pd>.<T>{, <pattern>} + // 0010 0101 ..01 1001 1110 00.. ...0 .... + // size<23:22> | S<16> = 1 | pattern<9:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(PTRUES_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern)); +} + +void Assembler::rdffr(const PRegisterWithLaneSize& pd) { + // RDFFR <Pd>.B + // 0010 0101 0001 1001 1111 0000 0000 .... + // op<23> = 0 | S<22> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(RDFFR_p_f | Pd(pd)); +} + +void Assembler::rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + // RDFFR <Pd>.B, <Pg>/Z + // 0010 0101 0001 1000 1111 000. ...0 .... + // op<23> = 0 | S<22> = 0 | Pg<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(RDFFR_p_p_f | Pd(pd) | Pg<8, 5>(pg)); +} + +void Assembler::rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + // RDFFRS <Pd>.B, <Pg>/Z + // 0010 0101 0101 1000 1111 000. ...0 .... + // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(RDFFRS_p_p_f | Pd(pd) | Pg<8, 5>(pg)); +} + +// SVEPropagateBreak. + +void Assembler::brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B + // 0010 0101 0000 .... 11.. ..0. ...0 .... + // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPA_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B + // 0010 0101 0100 .... 11.. ..0. ...0 .... + // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPAS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B + // 0010 0101 0000 .... 11.. ..0. ...1 .... + // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPB_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B + // 0010 0101 0100 .... 11.. ..0. ...1 .... + // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPBS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +// SVEStackFrameAdjustment. + +void Assembler::addpl(const Register& xd, const Register& xn, int imm6) { + // ADDPL <Xd|SP>, <Xn|SP>, #<imm> + // 0000 0100 011. .... 0101 0... .... .... + // op<22> = 1 | Rn<20:16> | imm6<10:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(xn.IsX()); + + Emit(ADDPL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6)); +} + +void Assembler::addvl(const Register& xd, const Register& xn, int imm6) { + // ADDVL <Xd|SP>, <Xn|SP>, #<imm> + // 0000 0100 001. .... 0101 0... .... .... + // op<22> = 0 | Rn<20:16> | imm6<10:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(xn.IsX()); + + Emit(ADDVL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6)); +} + +// SVEStackFrameSize. + +void Assembler::rdvl(const Register& xd, int imm6) { + // RDVL <Xd>, #<imm> + // 0000 0100 1011 1111 0101 0... .... .... + // op<22> = 0 | opc2<20:16> = 11111 | imm6<10:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + + Emit(RDVL_r_i | Rd(xd) | ImmField<10, 5>(imm6)); +} + +// SVEVectorSelect. + +void Assembler::sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SEL_z_p_zz | SVESize(zd) | Rd(zd) | Pg<13, 10>(pg) | Rn(zn) | Rm(zm)); +} + +// SVEWriteFFR. + +void Assembler::setffr() { + // SETFFR + // 0010 0101 0010 1100 1001 0000 0000 0000 + // opc<23:22> = 00 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(SETFFR_f); +} + +void Assembler::wrffr(const PRegisterWithLaneSize& pn) { + // WRFFR <Pn>.B + // 0010 0101 0010 1000 1001 000. ...0 0000 + // opc<23:22> = 00 | Pn<8:5> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(WRFFR_f_p | Rx<8, 5>(pn)); +} + +// Aliases. + +void Assembler::bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + and_(zd, zn, ~imm); +} + +void Assembler::eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + eor(zd, zn, ~imm); +} + +void Assembler::orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + orr(zd, zn, ~imm); +} + + +void Assembler::fmov(const ZRegister& zd, const PRegisterM& pg, double imm) { + if (IsPositiveZero(imm)) { + cpy(zd, pg, 0); + } else { + fcpy(zd, pg, imm); + } +} + +void Assembler::fmov(const ZRegister& zd, double imm) { + if (IsPositiveZero(imm)) { + dup(zd, imm); + } else { + fdup(zd, imm); + } +} + +void Assembler::mov(const PRegister& pd, const PRegister& pn) { + // If the inputs carry a lane size, they must match. + VIXL_ASSERT((!pd.HasLaneSize() && !pn.HasLaneSize()) || + AreSameLaneSize(pd, pn)); + orr(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB()); +} + +void Assembler::mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + sel(pd, pg, pn, pd); +} + +void Assembler::mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + and_(pd, pg, pn, pn); +} + +void Assembler::mov(const ZRegister& zd, + const PRegister& pg, + int imm8, + int shift) { + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + cpy(zd, pg, imm8, shift); +} + +void Assembler::mov(const ZRegister& zd, const Register& xn) { dup(zd, xn); } + +void Assembler::mov(const ZRegister& zd, const VRegister& vn) { + VIXL_ASSERT(vn.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(zd, vn)); + dup(zd, vn.Z().WithSameLaneSizeAs(vn), 0); +} + +void Assembler::mov(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + orr(zd.VnD(), zn.VnD(), zn.VnD()); +} + +void Assembler::mov(const ZRegister& zd, const ZRegister& zn, unsigned index) { + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + dup(zd, zn, index); +} + +void Assembler::mov(const ZRegister& zd, + const PRegisterM& pg, + const Register& rn) { + cpy(zd, pg, rn); +} + +void Assembler::mov(const ZRegister& zd, + const PRegisterM& pg, + const VRegister& vn) { + VIXL_ASSERT(vn.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(zd, vn)); + cpy(zd, pg, vn); +} + +void Assembler::mov(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + sel(zd, pg, zn, zd); +} + +void Assembler::mov(const ZRegister& zd, uint64_t imm) { + // Mov is an alias of dupm for certain values of imm. Whilst this matters in + // the disassembler, for the assembler, we don't distinguish between the + // two mnemonics, and simply call dupm. + dupm(zd, imm); +} + +void Assembler::mov(const ZRegister& zd, int imm8, int shift) { + dup(zd, imm8, shift); +} + +void Assembler::movs(const PRegister& pd, const PRegister& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + orrs(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB()); +} + +void Assembler::movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + ands(pd, pg, pn, pn); +} + +void Assembler::not_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + eor(pd, pg, pn, pg.VnB()); +} + +void Assembler::nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + eors(pd, pg, pn, pg.VnB()); +} + +} // namespace aarch64 +} // namespace vixl diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h index 36f5568d..bf93918d 100644 --- a/src/aarch64/constants-aarch64.h +++ b/src/aarch64/constants-aarch64.h @@ -34,6 +34,8 @@ namespace aarch64 { const unsigned kNumberOfRegisters = 32; const unsigned kNumberOfVRegisters = 32; +const unsigned kNumberOfZRegisters = kNumberOfVRegisters; +const unsigned kNumberOfPRegisters = 16; // Callee saved registers are x21-x30(lr). const int kNumberOfCalleeSavedRegisters = 10; const int kFirstCalleeSavedRegisterIndex = 21; @@ -41,14 +43,34 @@ const int kFirstCalleeSavedRegisterIndex = 21; // still caller-saved. const int kNumberOfCalleeSavedFPRegisters = 8; const int kFirstCalleeSavedFPRegisterIndex = 8; +// All predicated instructions accept at least p0-p7 as the governing predicate. +const unsigned kNumberOfGoverningPRegisters = 8; // clang-format off +#define AARCH64_P_REGISTER_CODE_LIST(R) \ + R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ + R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) + #define AARCH64_REGISTER_CODE_LIST(R) \ R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \ R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \ R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31) +// SVE loads and stores use "w" instead of "s" for word-sized accesses, so the +// mapping from the load/store variant to constants like k*RegSize is irregular. +#define VIXL_SVE_LOAD_STORE_VARIANT_LIST(V) \ + V(b, B) \ + V(h, H) \ + V(w, S) \ + V(d, D) + +// Sign-extending loads don't have double-word variants. +#define VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(V) \ + V(b, B) \ + V(h, H) \ + V(w, S) + #define INSTRUCTION_FIELDS_LIST(V_) \ /* Register fields */ \ V_(Rd, 4, 0, ExtractBits) /* Destination register. */ \ @@ -59,6 +81,11 @@ V_(Ra, 14, 10, ExtractBits) /* Third source register. */ \ V_(Rt, 4, 0, ExtractBits) /* Load/store register. */ \ V_(Rt2, 14, 10, ExtractBits) /* Load/store second register. */ \ V_(Rs, 20, 16, ExtractBits) /* Exclusive access status. */ \ +V_(Pt, 3, 0, ExtractBits) /* Load/store register (p0-p7). */ \ +V_(Pd, 3, 0, ExtractBits) /* SVE destination predicate register. */ \ +V_(Pn, 8, 5, ExtractBits) /* SVE first source predicate register. */ \ +V_(Pm, 19, 16, ExtractBits) /* SVE second source predicate register.*/ \ +V_(PgLow8, 12, 10, ExtractBits) /* Governing predicate (p0-p7). */ \ \ /* Common bits */ \ V_(SixtyFourBits, 31, 31, ExtractBits) \ @@ -74,7 +101,7 @@ V_(ImmDPShift, 15, 10, ExtractBits) \ \ /* Add/subtract immediate */ \ V_(ImmAddSub, 21, 10, ExtractBits) \ -V_(ShiftAddSub, 23, 22, ExtractBits) \ +V_(ImmAddSubShift, 22, 22, ExtractBits) \ \ /* Add/substract extend */ \ V_(ImmExtendShift, 12, 10, ExtractBits) \ @@ -177,7 +204,23 @@ V_(NEONCmode, 15, 12, ExtractBits) \ /* NEON Shift Immediate fields */ \ V_(ImmNEONImmhImmb, 22, 16, ExtractBits) \ V_(ImmNEONImmh, 22, 19, ExtractBits) \ -V_(ImmNEONImmb, 18, 16, ExtractBits) +V_(ImmNEONImmb, 18, 16, ExtractBits) \ + \ +/* SVE generic fields */ \ +V_(SVESize, 23, 22, ExtractBits) \ +V_(ImmSVEVLScale, 10, 5, ExtractSignedBits) \ +V_(ImmSVEIntWideSigned, 12, 5, ExtractSignedBits) \ +V_(ImmSVEIntWideUnsigned, 12, 5, ExtractBits) \ +V_(ImmSVEPredicateConstraint, 9, 5, ExtractBits) \ + \ +/* SVE Bitwise Immediate bitfield */ \ +V_(SVEBitN, 17, 17, ExtractBits) \ +V_(SVEImmRotate, 16, 11, ExtractBits) \ +V_(SVEImmSetBits, 10, 5, ExtractBits) \ + \ +V_(SVEImmPrefetchOperation, 3, 0, ExtractBits) \ +V_(SVEPrefetchHint, 3, 3, ExtractBits) + // clang-format on #define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \ @@ -235,7 +278,22 @@ enum Condition { // Aliases. hs = cs, // C set Unsigned higher or same. - lo = cc // C clear Unsigned lower. + lo = cc, // C clear Unsigned lower. + + // Floating-point additional condition code. + uo, // Unordered comparison. + + // SVE predicate condition aliases. + sve_none = eq, // No active elements were true. + sve_any = ne, // An active element was true. + sve_nlast = cs, // The last element was not true. + sve_last = cc, // The last element was true. + sve_first = mi, // The first element was true. + sve_nfrst = pl, // The first element was not true. + sve_pmore = hi, // An active element was true but not the last element. + sve_plast = ls, // The last active element was true or no active elements were true. + sve_tcont = ge, // CTERM termination condition not deleted. + sve_tstop = lt // CTERM termination condition deleted. }; inline Condition InvertCondition(Condition cond) { @@ -279,7 +337,12 @@ enum StatusFlags { FPEqualFlag = ZCFlag, FPLessThanFlag = NFlag, FPGreaterThanFlag = CFlag, - FPUnorderedFlag = CVFlag + FPUnorderedFlag = CVFlag, + + // SVE condition flags. + SVEFirstFlag = NFlag, + SVENoneFlag = ZFlag, + SVENotLastFlag = CFlag }; enum Shift { @@ -303,6 +366,17 @@ enum Extend { SXTX = 7 }; +enum SVEOffsetModifier { + NO_SVE_OFFSET_MODIFIER, + // Multiply (each element of) the offset by either the vector or predicate + // length, according to the context. + SVE_MUL_VL, + // Shift or extend modifiers (as in `Shift` or `Extend`). + SVE_LSL, + SVE_UXTW, + SVE_SXTW +}; + enum SystemHint { NOP = 0, YIELD = 1, @@ -368,6 +442,12 @@ enum PrefetchOperation { PSTL3STRM = 0x15 }; +constexpr bool IsNamedPrefetchOperation(int op) { + return ((op >= PLDL1KEEP) && (op <= PLDL3STRM)) || + ((op >= PLIL1KEEP) && (op <= PLIL3STRM)) || + ((op >= PSTL1KEEP) && (op <= PSTL3STRM)); +} + enum BType { // Set when executing any instruction on a guarded page, except those cases // listed below. @@ -429,6 +509,36 @@ enum DataCacheOp { ZVA = CacheOpEncoder<3, 7, 4, 1>::value }; +// Some SVE instructions support a predicate constraint pattern. This is +// interpreted as a VL-dependent value, and is typically used to initialise +// predicates, or to otherwise limit the number of processed elements. +enum SVEPredicateConstraint { + // Select 2^N elements, for the largest possible N. + SVE_POW2 = 0x0, + // Each VL<N> selects exactly N elements if possible, or zero if N is greater + // than the number of elements. Note that the encoding values for VL<N> are + // not linearly related to N. + SVE_VL1 = 0x1, + SVE_VL2 = 0x2, + SVE_VL3 = 0x3, + SVE_VL4 = 0x4, + SVE_VL5 = 0x5, + SVE_VL6 = 0x6, + SVE_VL7 = 0x7, + SVE_VL8 = 0x8, + SVE_VL16 = 0x9, + SVE_VL32 = 0xa, + SVE_VL64 = 0xb, + SVE_VL128 = 0xc, + SVE_VL256 = 0xd, + // Each MUL<N> selects the largest multiple of N elements that the vector + // length supports. Note that for D-sized lanes, this can be zero. + SVE_MUL4 = 0x1d, + SVE_MUL3 = 0x1e, + // Select all elements. + SVE_ALL = 0x1f +}; + // Instruction enumerations. // // These are the masks that define a class of instructions, and the list of @@ -503,6 +613,14 @@ enum NEONScalarFormatField { NEON_D = 0x00C00000 }; +enum SVESizeField { + SVESizeFieldMask = 0x00C00000, + SVE_B = 0x00000000, + SVE_H = 0x00400000, + SVE_S = 0x00800000, + SVE_D = 0x00C00000 +}; + // PC relative addressing. enum PCRelAddressingOp { PCRelAddressingFixed = 0x10000000, @@ -531,8 +649,8 @@ enum AddSubOp { enum AddSubImmediateOp { AddSubImmediateFixed = 0x11000000, - AddSubImmediateFMask = 0x1F000000, - AddSubImmediateMask = 0xFF000000, + AddSubImmediateFMask = 0x1F800000, + AddSubImmediateMask = 0xFF800000, #define ADD_SUB_IMMEDIATE(A) \ A##_w_imm = AddSubImmediateFixed | A, \ A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits @@ -2660,11 +2778,1626 @@ enum NEONScalarShiftImmediateOp { NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm }; +enum SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsOp { + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed = 0x84A00000, + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000, + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000, + LD1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed, + LDFF1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00002000, + LD1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000, + LDFF1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000 +}; + +enum SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsOp { + SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed = 0x85200000, + SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000, + SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000, + LD1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000, + LDFF1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000 +}; + +enum SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsOp { + SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed = 0x84000000, + SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE208000, + SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask = 0xFFA0E000, + LD1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed, + LDFF1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00002000, + LD1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00004000, + LDFF1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00006000, + LD1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000, + LDFF1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00802000, + LD1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00804000, + LDFF1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00806000, + LD1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01004000, + LDFF1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01006000 +}; + +enum SVE32BitGatherLoad_VectorPlusImmOp { + SVE32BitGatherLoad_VectorPlusImmFixed = 0x84208000, + SVE32BitGatherLoad_VectorPlusImmFMask = 0xFE608000, + SVE32BitGatherLoad_VectorPlusImmMask = 0xFFE0E000, + LD1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed, + LDFF1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00002000, + LD1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00004000, + LDFF1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00006000, + LD1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00800000, + LDFF1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00802000, + LD1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00804000, + LDFF1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00806000, + LD1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01004000, + LDFF1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01006000 +}; + +enum SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsOp { + SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed = 0x84200000, + SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08010, + SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E010, + PRFB_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed, + PRFH_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00002000, + PRFW_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00004000, + PRFD_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00006000 +}; + +enum SVE32BitGatherPrefetch_VectorPlusImmOp { + SVE32BitGatherPrefetch_VectorPlusImmFixed = 0x8400E000, + SVE32BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010, + SVE32BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010, + PRFB_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed, + PRFH_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x00800000, + PRFW_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01000000, + PRFD_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01800000 +}; + +enum SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsOp { + SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed = 0xE4608000, + SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFMask = 0xFE60A000, + SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask = 0xFFE0A000, + ST1H_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x00800000, + ST1W_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x01000000 +}; + +enum SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsOp { + SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed = 0xE4408000, + SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE60A000, + SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask = 0xFFE0A000, + ST1B_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed, + ST1H_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000, + ST1W_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x01000000 +}; + +enum SVE32BitScatterStore_VectorPlusImmOp { + SVE32BitScatterStore_VectorPlusImmFixed = 0xE460A000, + SVE32BitScatterStore_VectorPlusImmFMask = 0xFE60E000, + SVE32BitScatterStore_VectorPlusImmMask = 0xFFE0E000, + ST1B_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed, + ST1H_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x00800000, + ST1W_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x01000000 +}; + +enum SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsOp { + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed = 0xC4200000, + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFMask = 0xFE208000, + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask = 0xFFA0E000, + LD1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00800000, + LDFF1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00802000, + LD1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00804000, + LDFF1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00806000, + LD1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01000000, + LDFF1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01002000, + LD1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01004000, + LDFF1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01006000, + LD1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01804000, + LDFF1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01806000 +}; + +enum SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsOp { + SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000, + SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFMask = 0xFE608000, + SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000, + LD1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00800000, + LDFF1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00802000, + LD1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00804000, + LDFF1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00806000, + LD1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01000000, + LDFF1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01002000, + LD1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01004000, + LDFF1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01006000, + LD1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01804000, + LDFF1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01806000 +}; + +enum SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsOp { + SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed = 0xC4408000, + SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE608000, + SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000, + LD1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed, + LDFF1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00002000, + LD1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00004000, + LDFF1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00006000, + LD1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000, + LDFF1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00802000, + LD1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00804000, + LDFF1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00806000, + LD1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000, + LDFF1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01002000, + LD1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01004000, + LDFF1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01006000, + LD1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01804000, + LDFF1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01806000 +}; + +enum SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsOp { + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xC4000000, + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE208000, + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFA0E000, + LD1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed, + LDFF1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00002000, + LD1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00004000, + LDFF1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00006000, + LD1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000, + LDFF1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00802000, + LD1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00804000, + LDFF1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00806000, + LD1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000, + LDFF1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01002000, + LD1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01004000, + LDFF1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01006000, + LD1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01804000, + LDFF1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01806000 +}; + +enum SVE64BitGatherLoad_VectorPlusImmOp { + SVE64BitGatherLoad_VectorPlusImmFixed = 0xC4208000, + SVE64BitGatherLoad_VectorPlusImmFMask = 0xFE608000, + SVE64BitGatherLoad_VectorPlusImmMask = 0xFFE0E000, + LD1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed, + LDFF1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00002000, + LD1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00004000, + LDFF1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00006000, + LD1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00800000, + LDFF1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00802000, + LD1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00804000, + LDFF1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00806000, + LD1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01000000, + LDFF1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01002000, + LD1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01004000, + LDFF1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01006000, + LD1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01804000, + LDFF1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01806000 +}; + +enum SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsOp { + SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000, + SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFMask = 0xFFE08010, + SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E010, + PRFB_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed, + PRFH_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00002000, + PRFW_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00004000, + PRFD_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00006000 +}; + +enum SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsOp { + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xC4200000, + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFFA08010, + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFA0E010, + PRFB_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed, + PRFH_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00002000, + PRFW_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00004000, + PRFD_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00006000 +}; + +enum SVE64BitGatherPrefetch_VectorPlusImmOp { + SVE64BitGatherPrefetch_VectorPlusImmFixed = 0xC400E000, + SVE64BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010, + SVE64BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010, + PRFB_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed, + PRFH_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x00800000, + PRFW_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01000000, + PRFD_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01800000 +}; + +enum SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsOp { + SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed = 0xE420A000, + SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFMask = 0xFE60E000, + SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000, + ST1H_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x00800000, + ST1W_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01000000, + ST1D_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01800000 +}; + +enum SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsOp { + SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed = 0xE400A000, + SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE60E000, + SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000, + ST1B_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed, + ST1H_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000, + ST1W_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000, + ST1D_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01800000 +}; + +enum SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsOp { + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xE4208000, + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFE60A000, + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFE0A000, + ST1H_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00800000, + ST1W_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01000000, + ST1D_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01800000 +}; + +enum SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsOp { + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xE4008000, + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE60A000, + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFE0A000, + ST1B_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed, + ST1H_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000, + ST1W_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000, + ST1D_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01800000 +}; + +enum SVE64BitScatterStore_VectorPlusImmOp { + SVE64BitScatterStore_VectorPlusImmFixed = 0xE440A000, + SVE64BitScatterStore_VectorPlusImmFMask = 0xFE60E000, + SVE64BitScatterStore_VectorPlusImmMask = 0xFFE0E000, + ST1B_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed, + ST1H_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x00800000, + ST1W_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01000000, + ST1D_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01800000 +}; + +enum SVEAddressGenerationOp { + SVEAddressGenerationFixed = 0x0420A000, + SVEAddressGenerationFMask = 0xFF20F000, + SVEAddressGenerationMask = 0xFFE0F000, + ADR_z_az_d_s32_scaled = SVEAddressGenerationFixed, + ADR_z_az_d_u32_scaled = SVEAddressGenerationFixed | 0x00400000, + ADR_z_az_s_same_scaled = SVEAddressGenerationFixed | 0x00800000, + ADR_z_az_d_same_scaled = SVEAddressGenerationFixed | 0x00C00000 +}; + +enum SVEBitwiseLogicalUnpredicatedOp { + SVEBitwiseLogicalUnpredicatedFixed = 0x04202000, + SVEBitwiseLogicalUnpredicatedFMask = 0xFF20E000, + SVEBitwiseLogicalUnpredicatedMask = 0xFFE0FC00, + AND_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00001000, + ORR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00401000, + EOR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00801000, + BIC_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00C01000 +}; + +enum SVEBitwiseLogicalWithImm_UnpredicatedOp { + SVEBitwiseLogicalWithImm_UnpredicatedFixed = 0x05000000, + SVEBitwiseLogicalWithImm_UnpredicatedFMask = 0xFF3C0000, + SVEBitwiseLogicalWithImm_UnpredicatedMask = 0xFFFC0000, + ORR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed, + EOR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00400000, + AND_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00800000 +}; + +enum SVEBitwiseLogical_PredicatedOp { + SVEBitwiseLogical_PredicatedFixed = 0x04180000, + SVEBitwiseLogical_PredicatedFMask = 0xFF38E000, + SVEBitwiseLogical_PredicatedMask = 0xFF3FE000, + ORR_z_p_zz = SVEBitwiseLogical_PredicatedFixed, + EOR_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00010000, + AND_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00020000, + BIC_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00030000 +}; + +enum SVEBitwiseShiftByImm_PredicatedOp { + SVEBitwiseShiftByImm_PredicatedFixed = 0x04008000, + SVEBitwiseShiftByImm_PredicatedFMask = 0xFF30E000, + SVEBitwiseShiftByImm_PredicatedMask = 0xFF3FE000, + ASR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed, + LSR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00010000, + LSL_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00030000, + ASRD_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00040000 +}; + +enum SVEBitwiseShiftByVector_PredicatedOp { + SVEBitwiseShiftByVector_PredicatedFixed = 0x04108000, + SVEBitwiseShiftByVector_PredicatedFMask = 0xFF38E000, + SVEBitwiseShiftByVector_PredicatedMask = 0xFF3FE000, + ASR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed, + LSR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00010000, + LSL_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00030000, + ASRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00040000, + LSRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00050000, + LSLR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00070000 +}; + +enum SVEBitwiseShiftByWideElements_PredicatedOp { + SVEBitwiseShiftByWideElements_PredicatedFixed = 0x04188000, + SVEBitwiseShiftByWideElements_PredicatedFMask = 0xFF38E000, + SVEBitwiseShiftByWideElements_PredicatedMask = 0xFF3FE000, + ASR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed, + LSR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00010000, + LSL_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00030000 +}; + +enum SVEBitwiseShiftUnpredicatedOp { + SVEBitwiseShiftUnpredicatedFixed = 0x04208000, + SVEBitwiseShiftUnpredicatedFMask = 0xFF20E000, + SVEBitwiseShiftUnpredicatedMask = 0xFF20FC00, + ASR_z_zw = SVEBitwiseShiftUnpredicatedFixed, + LSR_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000400, + LSL_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000C00, + ASR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001000, + LSR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001400, + LSL_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001C00 +}; + +enum SVEBroadcastBitmaskImmOp { + SVEBroadcastBitmaskImmFixed = 0x05C00000, + SVEBroadcastBitmaskImmFMask = 0xFFFC0000, + SVEBroadcastBitmaskImmMask = 0xFFFC0000, + DUPM_z_i = SVEBroadcastBitmaskImmFixed +}; + +enum SVEBroadcastFPImm_UnpredicatedOp { + SVEBroadcastFPImm_UnpredicatedFixed = 0x2539C000, + SVEBroadcastFPImm_UnpredicatedFMask = 0xFF39C000, + SVEBroadcastFPImm_UnpredicatedMask = 0xFF3FE000, + FDUP_z_i = SVEBroadcastFPImm_UnpredicatedFixed +}; + +enum SVEBroadcastGeneralRegisterOp { + SVEBroadcastGeneralRegisterFixed = 0x05203800, + SVEBroadcastGeneralRegisterFMask = 0xFF3FFC00, + SVEBroadcastGeneralRegisterMask = 0xFF3FFC00, + DUP_z_r = SVEBroadcastGeneralRegisterFixed +}; + +enum SVEBroadcastIndexElementOp { + SVEBroadcastIndexElementFixed = 0x05202000, + SVEBroadcastIndexElementFMask = 0xFF20FC00, + SVEBroadcastIndexElementMask = 0xFF20FC00, + DUP_z_zi = SVEBroadcastIndexElementFixed +}; + +enum SVEBroadcastIntImm_UnpredicatedOp { + SVEBroadcastIntImm_UnpredicatedFixed = 0x2538C000, + SVEBroadcastIntImm_UnpredicatedFMask = 0xFF39C000, + SVEBroadcastIntImm_UnpredicatedMask = 0xFF3FC000, + DUP_z_i = SVEBroadcastIntImm_UnpredicatedFixed +}; + +enum SVECompressActiveElementsOp { + SVECompressActiveElementsFixed = 0x05A18000, + SVECompressActiveElementsFMask = 0xFFBFE000, + SVECompressActiveElementsMask = 0xFFBFE000, + COMPACT_z_p_z = SVECompressActiveElementsFixed +}; + +enum SVEConditionallyBroadcastElementToVectorOp { + SVEConditionallyBroadcastElementToVectorFixed = 0x05288000, + SVEConditionallyBroadcastElementToVectorFMask = 0xFF3EE000, + SVEConditionallyBroadcastElementToVectorMask = 0xFF3FE000, + CLASTA_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed, + CLASTB_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed | 0x00010000 +}; + +enum SVEConditionallyExtractElementToGeneralRegisterOp { + SVEConditionallyExtractElementToGeneralRegisterFixed = 0x0530A000, + SVEConditionallyExtractElementToGeneralRegisterFMask = 0xFF3EE000, + SVEConditionallyExtractElementToGeneralRegisterMask = 0xFF3FE000, + CLASTA_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed, + CLASTB_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed | 0x00010000 +}; + +enum SVEConditionallyExtractElementToSIMDFPScalarOp { + SVEConditionallyExtractElementToSIMDFPScalarFixed = 0x052A8000, + SVEConditionallyExtractElementToSIMDFPScalarFMask = 0xFF3EE000, + SVEConditionallyExtractElementToSIMDFPScalarMask = 0xFF3FE000, + CLASTA_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed, + CLASTB_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed | 0x00010000 +}; + +enum SVEConditionallyTerminateScalarsOp { + SVEConditionallyTerminateScalarsFixed = 0x25202000, + SVEConditionallyTerminateScalarsFMask = 0xFF20FC0F, + SVEConditionallyTerminateScalarsMask = 0xFFA0FC1F, + CTERMEQ_rr = SVEConditionallyTerminateScalarsFixed | 0x00800000, + CTERMNE_rr = SVEConditionallyTerminateScalarsFixed | 0x00800010 +}; + +enum SVEConstructivePrefix_UnpredicatedOp { + SVEConstructivePrefix_UnpredicatedFixed = 0x0420BC00, + SVEConstructivePrefix_UnpredicatedFMask = 0xFF20FC00, + SVEConstructivePrefix_UnpredicatedMask = 0xFFFFFC00, + MOVPRFX_z_z = SVEConstructivePrefix_UnpredicatedFixed +}; + +enum SVEContiguousFirstFaultLoad_ScalarPlusScalarOp { + SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed = 0xA4006000, + SVEContiguousFirstFaultLoad_ScalarPlusScalarFMask = 0xFE00E000, + SVEContiguousFirstFaultLoad_ScalarPlusScalarMask = 0xFFE0E000, + LDFF1B_z_p_br_u8 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed, + LDFF1B_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00200000, + LDFF1B_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00400000, + LDFF1B_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00600000, + LDFF1SW_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00800000, + LDFF1H_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00A00000, + LDFF1H_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00C00000, + LDFF1H_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00E00000, + LDFF1SH_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01000000, + LDFF1SH_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01200000, + LDFF1W_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01400000, + LDFF1W_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01600000, + LDFF1SB_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01800000, + LDFF1SB_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01A00000, + LDFF1SB_z_p_br_s16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01C00000, + LDFF1D_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01E00000 +}; + +enum SVEContiguousLoad_ScalarPlusImmOp { + SVEContiguousLoad_ScalarPlusImmFixed = 0xA400A000, + SVEContiguousLoad_ScalarPlusImmFMask = 0xFE10E000, + SVEContiguousLoad_ScalarPlusImmMask = 0xFFF0E000, + LD1B_z_p_bi_u8 = SVEContiguousLoad_ScalarPlusImmFixed, + LD1B_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00200000, + LD1B_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00400000, + LD1B_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00600000, + LD1SW_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00800000, + LD1H_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00A00000, + LD1H_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00C00000, + LD1H_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00E00000, + LD1SH_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01000000, + LD1SH_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01200000, + LD1W_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01400000, + LD1W_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01600000, + LD1SB_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01800000, + LD1SB_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01A00000, + LD1SB_z_p_bi_s16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01C00000, + LD1D_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01E00000 +}; + +enum SVEContiguousLoad_ScalarPlusScalarOp { + SVEContiguousLoad_ScalarPlusScalarFixed = 0xA4004000, + SVEContiguousLoad_ScalarPlusScalarFMask = 0xFE00E000, + SVEContiguousLoad_ScalarPlusScalarMask = 0xFFE0E000, + LD1B_z_p_br_u8 = SVEContiguousLoad_ScalarPlusScalarFixed, + LD1B_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00200000, + LD1B_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00400000, + LD1B_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00600000, + LD1SW_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00800000, + LD1H_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00A00000, + LD1H_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00C00000, + LD1H_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00E00000, + LD1SH_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01000000, + LD1SH_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01200000, + LD1W_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01400000, + LD1W_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01600000, + LD1SB_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01800000, + LD1SB_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01A00000, + LD1SB_z_p_br_s16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01C00000, + LD1D_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01E00000 +}; + +enum SVEContiguousNonFaultLoad_ScalarPlusImmOp { + SVEContiguousNonFaultLoad_ScalarPlusImmFixed = 0xA410A000, + SVEContiguousNonFaultLoad_ScalarPlusImmFMask = 0xFE10E000, + SVEContiguousNonFaultLoad_ScalarPlusImmMask = 0xFFF0E000, + LDNF1B_z_p_bi_u8 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed, + LDNF1B_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00200000, + LDNF1B_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00400000, + LDNF1B_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00600000, + LDNF1SW_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00800000, + LDNF1H_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00A00000, + LDNF1H_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00C00000, + LDNF1H_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00E00000, + LDNF1SH_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01000000, + LDNF1SH_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01200000, + LDNF1W_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01400000, + LDNF1W_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01600000, + LDNF1SB_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01800000, + LDNF1SB_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01A00000, + LDNF1SB_z_p_bi_s16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01C00000, + LDNF1D_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01E00000 +}; + +enum SVEContiguousNonTemporalLoad_ScalarPlusImmOp { + SVEContiguousNonTemporalLoad_ScalarPlusImmFixed = 0xA400E000, + SVEContiguousNonTemporalLoad_ScalarPlusImmFMask = 0xFE70E000, + SVEContiguousNonTemporalLoad_ScalarPlusImmMask = 0xFFF0E000, + LDNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed, + LDNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x00800000, + LDNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01000000, + LDNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01800000 +}; + +enum SVEContiguousNonTemporalLoad_ScalarPlusScalarOp { + SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed = 0xA400C000, + SVEContiguousNonTemporalLoad_ScalarPlusScalarFMask = 0xFE60E000, + SVEContiguousNonTemporalLoad_ScalarPlusScalarMask = 0xFFE0E000, + LDNT1B_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed, + LDNT1H_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x00800000, + LDNT1W_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01000000, + LDNT1D_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01800000 +}; + +enum SVEContiguousNonTemporalStore_ScalarPlusImmOp { + SVEContiguousNonTemporalStore_ScalarPlusImmFixed = 0xE410E000, + SVEContiguousNonTemporalStore_ScalarPlusImmFMask = 0xFE70E000, + SVEContiguousNonTemporalStore_ScalarPlusImmMask = 0xFFF0E000, + STNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed, + STNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x00800000, + STNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01000000, + STNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01800000 +}; + +enum SVEContiguousNonTemporalStore_ScalarPlusScalarOp { + SVEContiguousNonTemporalStore_ScalarPlusScalarFixed = 0xE4006000, + SVEContiguousNonTemporalStore_ScalarPlusScalarFMask = 0xFE60E000, + SVEContiguousNonTemporalStore_ScalarPlusScalarMask = 0xFFE0E000, + STNT1B_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed, + STNT1H_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x00800000, + STNT1W_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01000000, + STNT1D_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01800000 +}; + +enum SVEContiguousPrefetch_ScalarPlusImmOp { + SVEContiguousPrefetch_ScalarPlusImmFixed = 0x85C00000, + SVEContiguousPrefetch_ScalarPlusImmFMask = 0xFFC08010, + SVEContiguousPrefetch_ScalarPlusImmMask = 0xFFC0E010, + PRFB_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed, + PRFH_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00002000, + PRFW_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00004000, + PRFD_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00006000 +}; + +enum SVEContiguousPrefetch_ScalarPlusScalarOp { + SVEContiguousPrefetch_ScalarPlusScalarFixed = 0x8400C000, + SVEContiguousPrefetch_ScalarPlusScalarFMask = 0xFE60E010, + SVEContiguousPrefetch_ScalarPlusScalarMask = 0xFFE0E010, + PRFB_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed, + PRFH_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x00800000, + PRFW_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01000000, + PRFD_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01800000 +}; + +enum SVEContiguousStore_ScalarPlusImmOp { + SVEContiguousStore_ScalarPlusImmFixed = 0xE400E000, + SVEContiguousStore_ScalarPlusImmFMask = 0xFE10E000, + SVEContiguousStore_ScalarPlusImmMask = 0xFF90E000, + ST1B_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed, + ST1H_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x00800000, + ST1W_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01000000, + ST1D_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01800000 +}; + +enum SVEContiguousStore_ScalarPlusScalarOp { + SVEContiguousStore_ScalarPlusScalarFixed = 0xE4004000, + SVEContiguousStore_ScalarPlusScalarFMask = 0xFE00E000, + SVEContiguousStore_ScalarPlusScalarMask = 0xFF80E000, + ST1B_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed, + ST1H_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x00800000, + ST1W_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01000000, + ST1D_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01800000 +}; + +enum SVECopyFPImm_PredicatedOp { + SVECopyFPImm_PredicatedFixed = 0x0510C000, + SVECopyFPImm_PredicatedFMask = 0xFF30E000, + SVECopyFPImm_PredicatedMask = 0xFF30E000, + FCPY_z_p_i = SVECopyFPImm_PredicatedFixed +}; + +enum SVECopyGeneralRegisterToVector_PredicatedOp { + SVECopyGeneralRegisterToVector_PredicatedFixed = 0x0528A000, + SVECopyGeneralRegisterToVector_PredicatedFMask = 0xFF3FE000, + SVECopyGeneralRegisterToVector_PredicatedMask = 0xFF3FE000, + CPY_z_p_r = SVECopyGeneralRegisterToVector_PredicatedFixed +}; + +enum SVECopyIntImm_PredicatedOp { + SVECopyIntImm_PredicatedFixed = 0x05100000, + SVECopyIntImm_PredicatedFMask = 0xFF308000, + SVECopyIntImm_PredicatedMask = 0xFF308000, + CPY_z_p_i = SVECopyIntImm_PredicatedFixed +}; + +enum SVECopySIMDFPScalarRegisterToVector_PredicatedOp { + SVECopySIMDFPScalarRegisterToVector_PredicatedFixed = 0x05208000, + SVECopySIMDFPScalarRegisterToVector_PredicatedFMask = 0xFF3FE000, + SVECopySIMDFPScalarRegisterToVector_PredicatedMask = 0xFF3FE000, + CPY_z_p_v = SVECopySIMDFPScalarRegisterToVector_PredicatedFixed +}; + +enum SVEElementCountOp { + SVEElementCountFixed = 0x0420E000, + SVEElementCountFMask = 0xFF30F800, + SVEElementCountMask = 0xFFF0FC00, + CNTB_r_s = SVEElementCountFixed, + CNTH_r_s = SVEElementCountFixed | 0x00400000, + CNTW_r_s = SVEElementCountFixed | 0x00800000, + CNTD_r_s = SVEElementCountFixed | 0x00C00000 +}; + +enum SVEExtractElementToGeneralRegisterOp { + SVEExtractElementToGeneralRegisterFixed = 0x0520A000, + SVEExtractElementToGeneralRegisterFMask = 0xFF3EE000, + SVEExtractElementToGeneralRegisterMask = 0xFF3FE000, + LASTA_r_p_z = SVEExtractElementToGeneralRegisterFixed, + LASTB_r_p_z = SVEExtractElementToGeneralRegisterFixed | 0x00010000 +}; + +enum SVEExtractElementToSIMDFPScalarRegisterOp { + SVEExtractElementToSIMDFPScalarRegisterFixed = 0x05228000, + SVEExtractElementToSIMDFPScalarRegisterFMask = 0xFF3EE000, + SVEExtractElementToSIMDFPScalarRegisterMask = 0xFF3FE000, + LASTA_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed, + LASTB_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed | 0x00010000 +}; + +enum SVEFFRInitialiseOp { + SVEFFRInitialiseFixed = 0x252C9000, + SVEFFRInitialiseFMask = 0xFF3FFFFF, + SVEFFRInitialiseMask = 0xFFFFFFFF, + SETFFR_f = SVEFFRInitialiseFixed +}; + +enum SVEFFRWriteFromPredicateOp { + SVEFFRWriteFromPredicateFixed = 0x25289000, + SVEFFRWriteFromPredicateFMask = 0xFF3FFE1F, + SVEFFRWriteFromPredicateMask = 0xFFFFFE1F, + WRFFR_f_p = SVEFFRWriteFromPredicateFixed +}; + +enum SVEFPAccumulatingReductionOp { + SVEFPAccumulatingReductionFixed = 0x65182000, + SVEFPAccumulatingReductionFMask = 0xFF38E000, + SVEFPAccumulatingReductionMask = 0xFF3FE000, + FADDA_v_p_z = SVEFPAccumulatingReductionFixed +}; + +enum SVEFPArithmeticUnpredicatedOp { + SVEFPArithmeticUnpredicatedFixed = 0x65000000, + SVEFPArithmeticUnpredicatedFMask = 0xFF20E000, + SVEFPArithmeticUnpredicatedMask = 0xFF20FC00, + FADD_z_zz = SVEFPArithmeticUnpredicatedFixed, + FSUB_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000400, + FMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000800, + FTSMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000C00, + FRECPS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001800, + FRSQRTS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001C00 +}; + +enum SVEFPArithmeticWithImm_PredicatedOp { + SVEFPArithmeticWithImm_PredicatedFixed = 0x65188000, + SVEFPArithmeticWithImm_PredicatedFMask = 0xFF38E3C0, + SVEFPArithmeticWithImm_PredicatedMask = 0xFF3FE3C0, + FADD_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed, + FSUB_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00010000, + FMUL_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00020000, + FSUBR_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00030000, + FMAXNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00040000, + FMINNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00050000, + FMAX_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00060000, + FMIN_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00070000 +}; + +enum SVEFPArithmetic_PredicatedOp { + SVEFPArithmetic_PredicatedFixed = 0x65008000, + SVEFPArithmetic_PredicatedFMask = 0xFF30E000, + SVEFPArithmetic_PredicatedMask = 0xFF3FE000, + FADD_z_p_zz = SVEFPArithmetic_PredicatedFixed, + FSUB_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00010000, + FMUL_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00020000, + FSUBR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00030000, + FMAXNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00040000, + FMINNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00050000, + FMAX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00060000, + FMIN_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00070000, + FABD_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00080000, + FSCALE_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00090000, + FMULX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000A0000, + FDIVR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000C0000, + FDIV_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000D0000 +}; + +enum SVEFPCompareVectorsOp { + SVEFPCompareVectorsFixed = 0x65004000, + SVEFPCompareVectorsFMask = 0xFF204000, + SVEFPCompareVectorsMask = 0xFF20E010, + FCMGE_p_p_zz = SVEFPCompareVectorsFixed, + FCMGT_p_p_zz = SVEFPCompareVectorsFixed | 0x00000010, + FCMEQ_p_p_zz = SVEFPCompareVectorsFixed | 0x00002000, + FCMNE_p_p_zz = SVEFPCompareVectorsFixed | 0x00002010, + FCMUO_p_p_zz = SVEFPCompareVectorsFixed | 0x00008000, + FACGE_p_p_zz = SVEFPCompareVectorsFixed | 0x00008010, + FACGT_p_p_zz = SVEFPCompareVectorsFixed | 0x0000A010 +}; + +enum SVEFPCompareWithZeroOp { + SVEFPCompareWithZeroFixed = 0x65102000, + SVEFPCompareWithZeroFMask = 0xFF38E000, + SVEFPCompareWithZeroMask = 0xFF3FE010, + FCMGE_p_p_z0 = SVEFPCompareWithZeroFixed, + FCMGT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00000010, + FCMLT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010000, + FCMLE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010010, + FCMEQ_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00020000, + FCMNE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00030000 +}; + +enum SVEFPComplexAdditionOp { + SVEFPComplexAdditionFixed = 0x64008000, + SVEFPComplexAdditionFMask = 0xFF3EE000, + SVEFPComplexAdditionMask = 0xFF3EE000, + FCADD_z_p_zz = SVEFPComplexAdditionFixed +}; + +enum SVEFPComplexMulAddOp { + SVEFPComplexMulAddFixed = 0x64000000, + SVEFPComplexMulAddFMask = 0xFF208000, + SVEFPComplexMulAddMask = 0xFF208000, + FCMLA_z_p_zzz = SVEFPComplexMulAddFixed +}; + +enum SVEFPComplexMulAddIndexOp { + SVEFPComplexMulAddIndexFixed = 0x64201000, + SVEFPComplexMulAddIndexFMask = 0xFF20F000, + SVEFPComplexMulAddIndexMask = 0xFFE0F000, + FCMLA_z_zzzi_h = SVEFPComplexMulAddIndexFixed | 0x00800000, + FCMLA_z_zzzi_s = SVEFPComplexMulAddIndexFixed | 0x00C00000 +}; + +enum SVEFPConvertPrecisionOp { + SVEFPConvertPrecisionFixed = 0x6508A000, + SVEFPConvertPrecisionFMask = 0xFF3CE000, + SVEFPConvertPrecisionMask = 0xFFFFE000, + FCVT_z_p_z_s2h = SVEFPConvertPrecisionFixed | 0x00800000, + FCVT_z_p_z_h2s = SVEFPConvertPrecisionFixed | 0x00810000, + FCVT_z_p_z_d2h = SVEFPConvertPrecisionFixed | 0x00C00000, + FCVT_z_p_z_h2d = SVEFPConvertPrecisionFixed | 0x00C10000, + FCVT_z_p_z_d2s = SVEFPConvertPrecisionFixed | 0x00C20000, + FCVT_z_p_z_s2d = SVEFPConvertPrecisionFixed | 0x00C30000 +}; + +enum SVEFPConvertToIntOp { + SVEFPConvertToIntFixed = 0x6518A000, + SVEFPConvertToIntFMask = 0xFF38E000, + SVEFPConvertToIntMask = 0xFFFFE000, + FCVTZS_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00420000, + FCVTZU_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00430000, + FCVTZS_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00440000, + FCVTZU_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00450000, + FCVTZS_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00460000, + FCVTZU_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00470000, + FCVTZS_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00840000, + FCVTZU_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00850000, + FCVTZS_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C00000, + FCVTZU_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C10000, + FCVTZS_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C40000, + FCVTZU_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C50000, + FCVTZS_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C60000, + FCVTZU_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C70000 +}; + +enum SVEFPExponentialAcceleratorOp { + SVEFPExponentialAcceleratorFixed = 0x0420B800, + SVEFPExponentialAcceleratorFMask = 0xFF20FC00, + SVEFPExponentialAcceleratorMask = 0xFF3FFC00, + FEXPA_z_z = SVEFPExponentialAcceleratorFixed +}; + +enum SVEFPFastReductionOp { + SVEFPFastReductionFixed = 0x65002000, + SVEFPFastReductionFMask = 0xFF38E000, + SVEFPFastReductionMask = 0xFF3FE000, + FADDV_v_p_z = SVEFPFastReductionFixed, + FMAXNMV_v_p_z = SVEFPFastReductionFixed | 0x00040000, + FMINNMV_v_p_z = SVEFPFastReductionFixed | 0x00050000, + FMAXV_v_p_z = SVEFPFastReductionFixed | 0x00060000, + FMINV_v_p_z = SVEFPFastReductionFixed | 0x00070000 +}; + +enum SVEFPMulAddOp { + SVEFPMulAddFixed = 0x65200000, + SVEFPMulAddFMask = 0xFF200000, + SVEFPMulAddMask = 0xFF20E000, + FMLA_z_p_zzz = SVEFPMulAddFixed, + FMLS_z_p_zzz = SVEFPMulAddFixed | 0x00002000, + FNMLA_z_p_zzz = SVEFPMulAddFixed | 0x00004000, + FNMLS_z_p_zzz = SVEFPMulAddFixed | 0x00006000, + FMAD_z_p_zzz = SVEFPMulAddFixed | 0x00008000, + FMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000A000, + FNMAD_z_p_zzz = SVEFPMulAddFixed | 0x0000C000, + FNMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000E000 +}; + +enum SVEFPMulAddIndexOp { + SVEFPMulAddIndexFixed = 0x64200000, + SVEFPMulAddIndexFMask = 0xFF20F800, + SVEFPMulAddIndexMask = 0xFFE0FC00, + FMLA_z_zzzi_h = SVEFPMulAddIndexFixed, + FMLA_z_zzzi_h_i3h = FMLA_z_zzzi_h | 0x00400000, + FMLS_z_zzzi_h = SVEFPMulAddIndexFixed | 0x00000400, + FMLS_z_zzzi_h_i3h = FMLS_z_zzzi_h | 0x00400000, + FMLA_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800000, + FMLS_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800400, + FMLA_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00000, + FMLS_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00400 +}; + +enum SVEFPMulIndexOp { + SVEFPMulIndexFixed = 0x64202000, + SVEFPMulIndexFMask = 0xFF20FC00, + SVEFPMulIndexMask = 0xFFE0FC00, + FMUL_z_zzi_h = SVEFPMulIndexFixed, + FMUL_z_zzi_h_i3h = FMUL_z_zzi_h | 0x00400000, + FMUL_z_zzi_s = SVEFPMulIndexFixed | 0x00800000, + FMUL_z_zzi_d = SVEFPMulIndexFixed | 0x00C00000 +}; + +enum SVEFPRoundToIntegralValueOp { + SVEFPRoundToIntegralValueFixed = 0x6500A000, + SVEFPRoundToIntegralValueFMask = 0xFF38E000, + SVEFPRoundToIntegralValueMask = 0xFF3FE000, + FRINTN_z_p_z = SVEFPRoundToIntegralValueFixed, + FRINTP_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00010000, + FRINTM_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00020000, + FRINTZ_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00030000, + FRINTA_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00040000, + FRINTX_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00060000, + FRINTI_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00070000 +}; + +enum SVEFPTrigMulAddCoefficientOp { + SVEFPTrigMulAddCoefficientFixed = 0x65108000, + SVEFPTrigMulAddCoefficientFMask = 0xFF38FC00, + SVEFPTrigMulAddCoefficientMask = 0xFF38FC00, + FTMAD_z_zzi = SVEFPTrigMulAddCoefficientFixed +}; + +enum SVEFPTrigSelectCoefficientOp { + SVEFPTrigSelectCoefficientFixed = 0x0420B000, + SVEFPTrigSelectCoefficientFMask = 0xFF20F800, + SVEFPTrigSelectCoefficientMask = 0xFF20FC00, + FTSSEL_z_zz = SVEFPTrigSelectCoefficientFixed +}; + +enum SVEFPUnaryOpOp { + SVEFPUnaryOpFixed = 0x650CA000, + SVEFPUnaryOpFMask = 0xFF3CE000, + SVEFPUnaryOpMask = 0xFF3FE000, + FRECPX_z_p_z = SVEFPUnaryOpFixed, + FSQRT_z_p_z = SVEFPUnaryOpFixed | 0x00010000 +}; + +enum SVEFPUnaryOpUnpredicatedOp { + SVEFPUnaryOpUnpredicatedFixed = 0x65083000, + SVEFPUnaryOpUnpredicatedFMask = 0xFF38F000, + SVEFPUnaryOpUnpredicatedMask = 0xFF3FFC00, + FRECPE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00060000, + FRSQRTE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00070000 +}; + +enum SVEIncDecByPredicateCountOp { + SVEIncDecByPredicateCountFixed = 0x25288000, + SVEIncDecByPredicateCountFMask = 0xFF38F000, + SVEIncDecByPredicateCountMask = 0xFF3FFE00, + SQINCP_z_p_z = SVEIncDecByPredicateCountFixed, + SQINCP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00000800, + SQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00000C00, + UQINCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00010000, + UQINCP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00010800, + UQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00010C00, + SQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00020000, + SQDECP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00020800, + SQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00020C00, + UQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00030000, + UQDECP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00030800, + UQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00030C00, + INCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00040000, + INCP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00040800, + DECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00050000, + DECP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00050800 +}; + +enum SVEIncDecRegisterByElementCountOp { + SVEIncDecRegisterByElementCountFixed = 0x0430E000, + SVEIncDecRegisterByElementCountFMask = 0xFF30F800, + SVEIncDecRegisterByElementCountMask = 0xFFF0FC00, + INCB_r_rs = SVEIncDecRegisterByElementCountFixed, + DECB_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00000400, + INCH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400000, + DECH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400400, + INCW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800000, + DECW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800400, + INCD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00000, + DECD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00400 +}; + +enum SVEIncDecVectorByElementCountOp { + SVEIncDecVectorByElementCountFixed = 0x0430C000, + SVEIncDecVectorByElementCountFMask = 0xFF30F800, + SVEIncDecVectorByElementCountMask = 0xFFF0FC00, + INCH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400000, + DECH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400400, + INCW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800000, + DECW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800400, + INCD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00000, + DECD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00400 +}; + +enum SVEIndexGenerationOp { + SVEIndexGenerationFixed = 0x04204000, + SVEIndexGenerationFMask = 0xFF20F000, + SVEIndexGenerationMask = 0xFF20FC00, + INDEX_z_ii = SVEIndexGenerationFixed, + INDEX_z_ri = SVEIndexGenerationFixed | 0x00000400, + INDEX_z_ir = SVEIndexGenerationFixed | 0x00000800, + INDEX_z_rr = SVEIndexGenerationFixed | 0x00000C00 +}; + +enum SVEInsertGeneralRegisterOp { + SVEInsertGeneralRegisterFixed = 0x05243800, + SVEInsertGeneralRegisterFMask = 0xFF3FFC00, + SVEInsertGeneralRegisterMask = 0xFF3FFC00, + INSR_z_r = SVEInsertGeneralRegisterFixed +}; + +enum SVEInsertSIMDFPScalarRegisterOp { + SVEInsertSIMDFPScalarRegisterFixed = 0x05343800, + SVEInsertSIMDFPScalarRegisterFMask = 0xFF3FFC00, + SVEInsertSIMDFPScalarRegisterMask = 0xFF3FFC00, + INSR_z_v = SVEInsertSIMDFPScalarRegisterFixed +}; + +enum SVEIntAddSubtractImm_UnpredicatedOp { + SVEIntAddSubtractImm_UnpredicatedFixed = 0x2520C000, + SVEIntAddSubtractImm_UnpredicatedFMask = 0xFF38C000, + SVEIntAddSubtractImm_UnpredicatedMask = 0xFF3FC000, + ADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed, + SUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00010000, + SUBR_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00030000, + SQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00040000, + UQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00050000, + SQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00060000, + UQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00070000 +}; + +enum SVEIntAddSubtractVectors_PredicatedOp { + SVEIntAddSubtractVectors_PredicatedFixed = 0x04000000, + SVEIntAddSubtractVectors_PredicatedFMask = 0xFF38E000, + SVEIntAddSubtractVectors_PredicatedMask = 0xFF3FE000, + ADD_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed, + SUB_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00010000, + SUBR_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00030000 +}; + +enum SVEIntArithmeticUnpredicatedOp { + SVEIntArithmeticUnpredicatedFixed = 0x04200000, + SVEIntArithmeticUnpredicatedFMask = 0xFF20E000, + SVEIntArithmeticUnpredicatedMask = 0xFF20FC00, + ADD_z_zz = SVEIntArithmeticUnpredicatedFixed, + SUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00000400, + SQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001000, + UQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001400, + SQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001800, + UQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001C00 +}; + +enum SVEIntCompareScalarCountAndLimitOp { + SVEIntCompareScalarCountAndLimitFixed = 0x25200000, + SVEIntCompareScalarCountAndLimitFMask = 0xFF20E000, + SVEIntCompareScalarCountAndLimitMask = 0xFF20EC10, + WHILELT_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000400, + WHILELE_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000410, + WHILELO_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C00, + WHILELS_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C10 +}; + +enum SVEIntCompareSignedImmOp { + SVEIntCompareSignedImmFixed = 0x25000000, + SVEIntCompareSignedImmFMask = 0xFF204000, + SVEIntCompareSignedImmMask = 0xFF20E010, + CMPGE_p_p_zi = SVEIntCompareSignedImmFixed, + CMPGT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00000010, + CMPLT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002000, + CMPLE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002010, + CMPEQ_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008000, + CMPNE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008010 +}; + +enum SVEIntCompareUnsignedImmOp { + SVEIntCompareUnsignedImmFixed = 0x24200000, + SVEIntCompareUnsignedImmFMask = 0xFF200000, + SVEIntCompareUnsignedImmMask = 0xFF202010, + CMPHS_p_p_zi = SVEIntCompareUnsignedImmFixed, + CMPHI_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00000010, + CMPLO_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002000, + CMPLS_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002010 +}; + +enum SVEIntCompareVectorsOp { + SVEIntCompareVectorsFixed = 0x24000000, + SVEIntCompareVectorsFMask = 0xFF200000, + SVEIntCompareVectorsMask = 0xFF20E010, + CMPHS_p_p_zz = SVEIntCompareVectorsFixed, + CMPHI_p_p_zz = SVEIntCompareVectorsFixed | 0x00000010, + CMPEQ_p_p_zw = SVEIntCompareVectorsFixed | 0x00002000, + CMPNE_p_p_zw = SVEIntCompareVectorsFixed | 0x00002010, + CMPGE_p_p_zw = SVEIntCompareVectorsFixed | 0x00004000, + CMPGT_p_p_zw = SVEIntCompareVectorsFixed | 0x00004010, + CMPLT_p_p_zw = SVEIntCompareVectorsFixed | 0x00006000, + CMPLE_p_p_zw = SVEIntCompareVectorsFixed | 0x00006010, + CMPGE_p_p_zz = SVEIntCompareVectorsFixed | 0x00008000, + CMPGT_p_p_zz = SVEIntCompareVectorsFixed | 0x00008010, + CMPEQ_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A000, + CMPNE_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A010, + CMPHS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C000, + CMPHI_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C010, + CMPLO_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E000, + CMPLS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E010 +}; + +enum SVEIntConvertToFPOp { + SVEIntConvertToFPFixed = 0x6510A000, + SVEIntConvertToFPFMask = 0xFF38E000, + SVEIntConvertToFPMask = 0xFFFFE000, + SCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00420000, + UCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00430000, + SCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00440000, + UCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00450000, + SCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00460000, + UCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00470000, + SCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00840000, + UCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00850000, + SCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C00000, + UCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C10000, + SCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C40000, + UCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C50000, + SCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C60000, + UCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C70000 +}; + +enum SVEIntDivideVectors_PredicatedOp { + SVEIntDivideVectors_PredicatedFixed = 0x04140000, + SVEIntDivideVectors_PredicatedFMask = 0xFF3CE000, + SVEIntDivideVectors_PredicatedMask = 0xFF3FE000, + SDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed, + UDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00010000, + SDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00020000, + UDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00030000 +}; + +enum SVEIntMinMaxDifference_PredicatedOp { + SVEIntMinMaxDifference_PredicatedFixed = 0x04080000, + SVEIntMinMaxDifference_PredicatedFMask = 0xFF38E000, + SVEIntMinMaxDifference_PredicatedMask = 0xFF3FE000, + SMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed, + UMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00010000, + SMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00020000, + UMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00030000, + SABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00040000, + UABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00050000 +}; + +enum SVEIntMinMaxImm_UnpredicatedOp { + SVEIntMinMaxImm_UnpredicatedFixed = 0x2528C000, + SVEIntMinMaxImm_UnpredicatedFMask = 0xFF38C000, + SVEIntMinMaxImm_UnpredicatedMask = 0xFF3FE000, + SMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed, + UMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00010000, + SMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00020000, + UMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00030000 +}; + +enum SVEIntMulAddPredicatedOp { + SVEIntMulAddPredicatedFixed = 0x04004000, + SVEIntMulAddPredicatedFMask = 0xFF204000, + SVEIntMulAddPredicatedMask = 0xFF20E000, + MLA_z_p_zzz = SVEIntMulAddPredicatedFixed, + MLS_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00002000, + MAD_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00008000, + MSB_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x0000A000 +}; + +enum SVEIntMulAddUnpredicatedOp { + SVEIntMulAddUnpredicatedFixed = 0x44000000, + SVEIntMulAddUnpredicatedFMask = 0xFF208000, + SVEIntMulAddUnpredicatedMask = 0xFF20FC00, + SDOT_z_zzz = SVEIntMulAddUnpredicatedFixed, + UDOT_z_zzz = SVEIntMulAddUnpredicatedFixed | 0x00000400 +}; + +enum SVEIntMulImm_UnpredicatedOp { + SVEIntMulImm_UnpredicatedFixed = 0x2530C000, + SVEIntMulImm_UnpredicatedFMask = 0xFF38C000, + SVEIntMulImm_UnpredicatedMask = 0xFF3FE000, + MUL_z_zi = SVEIntMulImm_UnpredicatedFixed +}; + +enum SVEIntMulVectors_PredicatedOp { + SVEIntMulVectors_PredicatedFixed = 0x04100000, + SVEIntMulVectors_PredicatedFMask = 0xFF3CE000, + SVEIntMulVectors_PredicatedMask = 0xFF3FE000, + MUL_z_p_zz = SVEIntMulVectors_PredicatedFixed, + SMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00020000, + UMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00030000 +}; + +enum SVEMovprfxOp { + SVEMovprfxFixed = 0x04002000, + SVEMovprfxFMask = 0xFF20E000, + SVEMovprfxMask = 0xFF3EE000, + MOVPRFX_z_p_z = SVEMovprfxFixed | 0x00100000 +}; + +enum SVEIntReductionOp { + SVEIntReductionFixed = 0x04002000, + SVEIntReductionFMask = 0xFF20E000, + SVEIntReductionMask = 0xFF3FE000, + SADDV_r_p_z = SVEIntReductionFixed, + UADDV_r_p_z = SVEIntReductionFixed | 0x00010000, + SMAXV_r_p_z = SVEIntReductionFixed | 0x00080000, + UMAXV_r_p_z = SVEIntReductionFixed | 0x00090000, + SMINV_r_p_z = SVEIntReductionFixed | 0x000A0000, + UMINV_r_p_z = SVEIntReductionFixed | 0x000B0000 +}; + +enum SVEIntReductionLogicalOp { + SVEIntReductionLogicalFixed = 0x04182000, + SVEIntReductionLogicalFMask = 0xFF38E000, + SVEIntReductionLogicalMask = 0xFF3FE000, + ORV_r_p_z = SVEIntReductionLogicalFixed | 0x00180000, + EORV_r_p_z = SVEIntReductionLogicalFixed | 0x00190000, + ANDV_r_p_z = SVEIntReductionLogicalFixed | 0x001A0000 +}; + +enum SVEIntUnaryArithmeticPredicatedOp { + SVEIntUnaryArithmeticPredicatedFixed = 0x0400A000, + SVEIntUnaryArithmeticPredicatedFMask = 0xFF20E000, + SVEIntUnaryArithmeticPredicatedMask = 0xFF3FE000, + SXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00100000, + UXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00110000, + SXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00120000, + UXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00130000, + SXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00140000, + UXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00150000, + ABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00160000, + NEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00170000, + CLS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00180000, + CLZ_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00190000, + CNT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001A0000, + CNOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001B0000, + FABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001C0000, + FNEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001D0000, + NOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001E0000 +}; + +enum SVELoadAndBroadcastElementOp { + SVELoadAndBroadcastElementFixed = 0x84408000, + SVELoadAndBroadcastElementFMask = 0xFE408000, + SVELoadAndBroadcastElementMask = 0xFFC0E000, + LD1RB_z_p_bi_u8 = SVELoadAndBroadcastElementFixed, + LD1RB_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00002000, + LD1RB_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00004000, + LD1RB_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00006000, + LD1RSW_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x00800000, + LD1RH_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00802000, + LD1RH_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00804000, + LD1RH_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00806000, + LD1RSH_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01000000, + LD1RSH_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01002000, + LD1RW_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x01004000, + LD1RW_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01006000, + LD1RSB_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01800000, + LD1RSB_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01802000, + LD1RSB_z_p_bi_s16 = SVELoadAndBroadcastElementFixed | 0x01804000, + LD1RD_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01806000 +}; + +enum SVELoadAndBroadcastQuadword_ScalarPlusImmOp { + SVELoadAndBroadcastQuadword_ScalarPlusImmFixed = 0xA4002000, + SVELoadAndBroadcastQuadword_ScalarPlusImmFMask = 0xFE10E000, + SVELoadAndBroadcastQuadword_ScalarPlusImmMask = 0xFFF0E000, + LD1RQB_z_p_bi_u8 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed, + LD1RQH_z_p_bi_u16 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x00800000, + LD1RQW_z_p_bi_u32 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01000000, + LD1RQD_z_p_bi_u64 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01800000 +}; + +enum SVELoadAndBroadcastQuadword_ScalarPlusScalarOp { + SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed = 0xA4000000, + SVELoadAndBroadcastQuadword_ScalarPlusScalarFMask = 0xFE00E000, + SVELoadAndBroadcastQuadword_ScalarPlusScalarMask = 0xFFE0E000, + LD1RQB_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed, + LD1RQH_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x00800000, + LD1RQW_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01000000, + LD1RQD_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01800000 +}; + +enum SVELoadMultipleStructures_ScalarPlusImmOp { + SVELoadMultipleStructures_ScalarPlusImmFixed = 0xA400E000, + SVELoadMultipleStructures_ScalarPlusImmFMask = 0xFE10E000, + SVELoadMultipleStructures_ScalarPlusImmMask = 0xFFF0E000, + LD2B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00200000, + LD3B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00400000, + LD4B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00600000, + LD2H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00A00000, + LD3H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00C00000, + LD4H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00E00000, + LD2W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01200000, + LD3W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01400000, + LD4W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01600000, + LD2D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01A00000, + LD3D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01C00000, + LD4D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01E00000 +}; + +enum SVELoadMultipleStructures_ScalarPlusScalarOp { + SVELoadMultipleStructures_ScalarPlusScalarFixed = 0xA400C000, + SVELoadMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000, + SVELoadMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000, + LD2B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00200000, + LD3B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00400000, + LD4B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00600000, + LD2H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00A00000, + LD3H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00C00000, + LD4H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00E00000, + LD2W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01200000, + LD3W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01400000, + LD4W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01600000, + LD2D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01A00000, + LD3D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01C00000, + LD4D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01E00000 +}; + +enum SVELoadPredicateRegisterOp { + SVELoadPredicateRegisterFixed = 0x85800000, + SVELoadPredicateRegisterFMask = 0xFFC0E010, + SVELoadPredicateRegisterMask = 0xFFC0E010, + LDR_p_bi = SVELoadPredicateRegisterFixed +}; + +enum SVELoadVectorRegisterOp { + SVELoadVectorRegisterFixed = 0x85804000, + SVELoadVectorRegisterFMask = 0xFFC0E000, + SVELoadVectorRegisterMask = 0xFFC0E000, + LDR_z_bi = SVELoadVectorRegisterFixed +}; + +enum SVEMulIndexOp { + SVEMulIndexFixed = 0x44200000, + SVEMulIndexFMask = 0xFF200000, + SVEMulIndexMask = 0xFFE0FC00, + SDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800000, + UDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800400, + SDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00000, + UDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00400 +}; + +enum SVEPartitionBreakConditionOp { + SVEPartitionBreakConditionFixed = 0x25104000, + SVEPartitionBreakConditionFMask = 0xFF3FC200, + SVEPartitionBreakConditionMask = 0xFFFFC200, + BRKA_p_p_p = SVEPartitionBreakConditionFixed, + BRKAS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00400000, + BRKB_p_p_p = SVEPartitionBreakConditionFixed | 0x00800000, + BRKBS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00C00000 +}; + +enum SVEPermutePredicateElementsOp { + SVEPermutePredicateElementsFixed = 0x05204000, + SVEPermutePredicateElementsFMask = 0xFF30E210, + SVEPermutePredicateElementsMask = 0xFF30FE10, + ZIP1_p_pp = SVEPermutePredicateElementsFixed, + ZIP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000400, + UZP1_p_pp = SVEPermutePredicateElementsFixed | 0x00000800, + UZP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000C00, + TRN1_p_pp = SVEPermutePredicateElementsFixed | 0x00001000, + TRN2_p_pp = SVEPermutePredicateElementsFixed | 0x00001400 +}; + +enum SVEPermuteVectorExtractOp { + SVEPermuteVectorExtractFixed = 0x05200000, + SVEPermuteVectorExtractFMask = 0xFF20E000, + SVEPermuteVectorExtractMask = 0xFFE0E000, + EXT_z_zi_des = SVEPermuteVectorExtractFixed +}; + +enum SVEPermuteVectorInterleavingOp { + SVEPermuteVectorInterleavingFixed = 0x05206000, + SVEPermuteVectorInterleavingFMask = 0xFF20E000, + SVEPermuteVectorInterleavingMask = 0xFF20FC00, + ZIP1_z_zz = SVEPermuteVectorInterleavingFixed, + ZIP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000400, + UZP1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000800, + UZP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000C00, + TRN1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001000, + TRN2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001400 +}; + +enum SVEPredicateCountOp { + SVEPredicateCountFixed = 0x25208000, + SVEPredicateCountFMask = 0xFF38C000, + SVEPredicateCountMask = 0xFF3FC200, + CNTP_r_p_p = SVEPredicateCountFixed +}; + +enum SVEPredicateFirstActiveOp { + SVEPredicateFirstActiveFixed = 0x2518C000, + SVEPredicateFirstActiveFMask = 0xFF3FFE10, + SVEPredicateFirstActiveMask = 0xFFFFFE10, + PFIRST_p_p_p = SVEPredicateFirstActiveFixed | 0x00400000 +}; + +enum SVEPredicateInitializeOp { + SVEPredicateInitializeFixed = 0x2518E000, + SVEPredicateInitializeFMask = 0xFF3EFC10, + SVEPredicateInitializeMask = 0xFF3FFC10, + SVEPredicateInitializeSetFlagsBit = 0x00010000, + PTRUE_p_s = SVEPredicateInitializeFixed | 0x00000000, + PTRUES_p_s = SVEPredicateInitializeFixed | SVEPredicateInitializeSetFlagsBit +}; + +enum SVEPredicateLogicalOp { + SVEPredicateLogicalFixed = 0x25004000, + SVEPredicateLogicalFMask = 0xFF30C000, + SVEPredicateLogicalMask = 0xFFF0C210, + SVEPredicateLogicalSetFlagsBit = 0x00400000, + AND_p_p_pp_z = SVEPredicateLogicalFixed, + ANDS_p_p_pp_z = AND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + BIC_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000010, + BICS_p_p_pp_z = BIC_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + EOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000200, + EORS_p_p_pp_z = EOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + ORR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800000, + ORRS_p_p_pp_z = ORR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + ORN_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800010, + ORNS_p_p_pp_z = ORN_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + NAND_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800210, + NANDS_p_p_pp_z = NAND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + NOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800200, + NORS_p_p_pp_z = NOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + SEL_p_p_pp = SVEPredicateLogicalFixed | 0x00000210 +}; + +enum SVEPredicateNextActiveOp { + SVEPredicateNextActiveFixed = 0x2519C400, + SVEPredicateNextActiveFMask = 0xFF3FFE10, + SVEPredicateNextActiveMask = 0xFF3FFE10, + PNEXT_p_p_p = SVEPredicateNextActiveFixed +}; + +enum SVEPredicateReadFromFFR_PredicatedOp { + SVEPredicateReadFromFFR_PredicatedFixed = 0x2518F000, + SVEPredicateReadFromFFR_PredicatedFMask = 0xFF3FFE10, + SVEPredicateReadFromFFR_PredicatedMask = 0xFFFFFE10, + RDFFR_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed, + RDFFRS_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed | 0x00400000 +}; + +enum SVEPredicateReadFromFFR_UnpredicatedOp { + SVEPredicateReadFromFFR_UnpredicatedFixed = 0x2519F000, + SVEPredicateReadFromFFR_UnpredicatedFMask = 0xFF3FFFF0, + SVEPredicateReadFromFFR_UnpredicatedMask = 0xFFFFFFF0, + RDFFR_p_f = SVEPredicateReadFromFFR_UnpredicatedFixed +}; + +enum SVEPredicateTestOp { + SVEPredicateTestFixed = 0x2510C000, + SVEPredicateTestFMask = 0xFF3FC210, + SVEPredicateTestMask = 0xFFFFC21F, + PTEST_p_p = SVEPredicateTestFixed | 0x00400000 +}; + +enum SVEPredicateZeroOp { + SVEPredicateZeroFixed = 0x2518E400, + SVEPredicateZeroFMask = 0xFF3FFFF0, + SVEPredicateZeroMask = 0xFFFFFFF0, + PFALSE_p = SVEPredicateZeroFixed +}; + +enum SVEPropagateBreakOp { + SVEPropagateBreakFixed = 0x2500C000, + SVEPropagateBreakFMask = 0xFF30C000, + SVEPropagateBreakMask = 0xFFF0C210, + BRKPA_p_p_pp = SVEPropagateBreakFixed, + BRKPB_p_p_pp = SVEPropagateBreakFixed | 0x00000010, + BRKPAS_p_p_pp = SVEPropagateBreakFixed | 0x00400000, + BRKPBS_p_p_pp = SVEPropagateBreakFixed | 0x00400010 +}; + +enum SVEPropagateBreakToNextPartitionOp { + SVEPropagateBreakToNextPartitionFixed = 0x25184000, + SVEPropagateBreakToNextPartitionFMask = 0xFFBFC210, + SVEPropagateBreakToNextPartitionMask = 0xFFFFC210, + BRKN_p_p_pp = SVEPropagateBreakToNextPartitionFixed, + BRKNS_p_p_pp = SVEPropagateBreakToNextPartitionFixed | 0x00400000 +}; + +enum SVEReversePredicateElementsOp { + SVEReversePredicateElementsFixed = 0x05344000, + SVEReversePredicateElementsFMask = 0xFF3FFE10, + SVEReversePredicateElementsMask = 0xFF3FFE10, + REV_p_p = SVEReversePredicateElementsFixed +}; + +enum SVEReverseVectorElementsOp { + SVEReverseVectorElementsFixed = 0x05383800, + SVEReverseVectorElementsFMask = 0xFF3FFC00, + SVEReverseVectorElementsMask = 0xFF3FFC00, + REV_z_z = SVEReverseVectorElementsFixed +}; + +enum SVEReverseWithinElementsOp { + SVEReverseWithinElementsFixed = 0x05248000, + SVEReverseWithinElementsFMask = 0xFF3CE000, + SVEReverseWithinElementsMask = 0xFF3FE000, + REVB_z_z = SVEReverseWithinElementsFixed, + REVH_z_z = SVEReverseWithinElementsFixed | 0x00010000, + REVW_z_z = SVEReverseWithinElementsFixed | 0x00020000, + RBIT_z_p_z = SVEReverseWithinElementsFixed | 0x00030000 +}; + +enum SVESaturatingIncDecRegisterByElementCountOp { + SVESaturatingIncDecRegisterByElementCountFixed = 0x0420F000, + SVESaturatingIncDecRegisterByElementCountFMask = 0xFF20F000, + SVESaturatingIncDecRegisterByElementCountMask = 0xFFF0FC00, + SQINCB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed, + UQINCB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000400, + SQDECB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000800, + UQDECB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000C00, + SQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100000, + UQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100400, + SQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100800, + UQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100C00, + SQINCH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400000, + UQINCH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400400, + SQDECH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400800, + UQDECH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400C00, + SQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500000, + UQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500400, + SQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500800, + UQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500C00, + SQINCW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800000, + UQINCW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800400, + SQDECW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800800, + UQDECW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800C00, + SQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900000, + UQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900400, + SQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900800, + UQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900C00, + SQINCD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00000, + UQINCD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00400, + SQDECD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00800, + UQDECD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00C00, + SQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00000, + UQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00400, + SQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00800, + UQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00C00 +}; + +enum SVESaturatingIncDecVectorByElementCountOp { + SVESaturatingIncDecVectorByElementCountFixed = 0x0420C000, + SVESaturatingIncDecVectorByElementCountFMask = 0xFF30F000, + SVESaturatingIncDecVectorByElementCountMask = 0xFFF0FC00, + SQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400000, + UQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400400, + SQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400800, + UQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400C00, + SQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800000, + UQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800400, + SQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800800, + UQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800C00, + SQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00000, + UQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00400, + SQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00800, + UQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00C00 +}; + +enum SVEStackFrameAdjustmentOp { + SVEStackFrameAdjustmentFixed = 0x04205000, + SVEStackFrameAdjustmentFMask = 0xFFA0F800, + SVEStackFrameAdjustmentMask = 0xFFE0F800, + ADDVL_r_ri = SVEStackFrameAdjustmentFixed, + ADDPL_r_ri = SVEStackFrameAdjustmentFixed | 0x00400000 +}; + +enum SVEStackFrameSizeOp { + SVEStackFrameSizeFixed = 0x04BF5000, + SVEStackFrameSizeFMask = 0xFFFFF800, + SVEStackFrameSizeMask = 0xFFFFF800, + RDVL_r_i = SVEStackFrameSizeFixed +}; + +enum SVEStoreMultipleStructures_ScalarPlusImmOp { + SVEStoreMultipleStructures_ScalarPlusImmFixed = 0xE410E000, + SVEStoreMultipleStructures_ScalarPlusImmFMask = 0xFE10E000, + SVEStoreMultipleStructures_ScalarPlusImmMask = 0xFFF0E000, + ST2B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00200000, + ST3B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00400000, + ST4B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00600000, + ST2H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00A00000, + ST3H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00C00000, + ST4H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00E00000, + ST2W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01200000, + ST3W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01400000, + ST4W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01600000, + ST2D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01A00000, + ST3D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01C00000, + ST4D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01E00000 +}; + +enum SVEStoreMultipleStructures_ScalarPlusScalarOp { + SVEStoreMultipleStructures_ScalarPlusScalarFixed = 0xE4006000, + SVEStoreMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000, + SVEStoreMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000, + ST2B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00200000, + ST3B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00400000, + ST4B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00600000, + ST2H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00A00000, + ST3H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00C00000, + ST4H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00E00000, + ST2W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01200000, + ST3W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01400000, + ST4W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01600000, + ST2D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01A00000, + ST3D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01C00000, + ST4D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01E00000 +}; + +enum SVEStorePredicateRegisterOp { + SVEStorePredicateRegisterFixed = 0xE5800000, + SVEStorePredicateRegisterFMask = 0xFFC0E010, + SVEStorePredicateRegisterMask = 0xFFC0E010, + STR_p_bi = SVEStorePredicateRegisterFixed +}; + +enum SVEStoreVectorRegisterOp { + SVEStoreVectorRegisterFixed = 0xE5804000, + SVEStoreVectorRegisterFMask = 0xFFC0E000, + SVEStoreVectorRegisterMask = 0xFFC0E000, + STR_z_bi = SVEStoreVectorRegisterFixed +}; + +enum SVETableLookupOp { + SVETableLookupFixed = 0x05203000, + SVETableLookupFMask = 0xFF20FC00, + SVETableLookupMask = 0xFF20FC00, + TBL_z_zz_1 = SVETableLookupFixed +}; + +enum SVEUnpackPredicateElementsOp { + SVEUnpackPredicateElementsFixed = 0x05304000, + SVEUnpackPredicateElementsFMask = 0xFFFEFE10, + SVEUnpackPredicateElementsMask = 0xFFFFFE10, + PUNPKLO_p_p = SVEUnpackPredicateElementsFixed, + PUNPKHI_p_p = SVEUnpackPredicateElementsFixed | 0x00010000 +}; + +enum SVEUnpackVectorElementsOp { + SVEUnpackVectorElementsFixed = 0x05303800, + SVEUnpackVectorElementsFMask = 0xFF3CFC00, + SVEUnpackVectorElementsMask = 0xFF3FFC00, + SUNPKLO_z_z = SVEUnpackVectorElementsFixed, + SUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00010000, + UUNPKLO_z_z = SVEUnpackVectorElementsFixed | 0x00020000, + UUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00030000 +}; + +enum SVEVectorSelectOp { + SVEVectorSelectFixed = 0x0520C000, + SVEVectorSelectFMask = 0xFF20C000, + SVEVectorSelectMask = 0xFF20C000, + SEL_z_p_zz = SVEVectorSelectFixed +}; + +enum SVEVectorSplice_DestructiveOp { + SVEVectorSplice_DestructiveFixed = 0x052C8000, + SVEVectorSplice_DestructiveFMask = 0xFF3FE000, + SVEVectorSplice_DestructiveMask = 0xFF3FE000, + SPLICE_z_p_zz_des = SVEVectorSplice_DestructiveFixed +}; + enum ReservedOp { ReservedFixed = 0x00000000, ReservedFMask = 0x1E000000, ReservedMask = 0xFFFF0000, - UDF = ReservedFixed | 0x00000000 }; diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc index f5e4fca5..a31e010d 100644 --- a/src/aarch64/cpu-aarch64.cc +++ b/src/aarch64/cpu-aarch64.cc @@ -39,10 +39,15 @@ namespace aarch64 { const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned); const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned); +const IDRegister::Field AA64PFR0::kRAS(28); const IDRegister::Field AA64PFR0::kSVE(32); const IDRegister::Field AA64PFR0::kDIT(48); +const IDRegister::Field AA64PFR0::kCSV2(56); +const IDRegister::Field AA64PFR0::kCSV3(60); const IDRegister::Field AA64PFR1::kBT(0); +const IDRegister::Field AA64PFR1::kSSBS(4); +const IDRegister::Field AA64PFR1::kMTE(8); const IDRegister::Field AA64ISAR0::kAES(4); const IDRegister::Field AA64ISAR0::kSHA1(8); @@ -56,6 +61,7 @@ const IDRegister::Field AA64ISAR0::kSM4(40); const IDRegister::Field AA64ISAR0::kDP(44); const IDRegister::Field AA64ISAR0::kFHM(48); const IDRegister::Field AA64ISAR0::kTS(52); +const IDRegister::Field AA64ISAR0::kRNDR(60); const IDRegister::Field AA64ISAR1::kDPB(0); const IDRegister::Field AA64ISAR1::kAPA(4); @@ -68,23 +74,41 @@ const IDRegister::Field AA64ISAR1::kGPI(28); const IDRegister::Field AA64ISAR1::kFRINTTS(32); const IDRegister::Field AA64ISAR1::kSB(36); const IDRegister::Field AA64ISAR1::kSPECRES(40); +const IDRegister::Field AA64ISAR1::kBF16(44); +const IDRegister::Field AA64ISAR1::kDGH(48); +const IDRegister::Field AA64ISAR1::kI8MM(52); const IDRegister::Field AA64MMFR1::kLO(16); +const IDRegister::Field AA64MMFR2::kAT(32); + +const IDRegister::Field AA64ZFR0::kBF16(20); +const IDRegister::Field AA64ZFR0::kI8MM(44); +const IDRegister::Field AA64ZFR0::kF32MM(52); +const IDRegister::Field AA64ZFR0::kF64MM(56); + CPUFeatures AA64PFR0::GetCPUFeatures() const { CPUFeatures f; if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf); if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON); if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf); + if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS); if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE); if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT); + if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2); + if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM); + if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3); return f; } CPUFeatures AA64PFR1::GetCPUFeatures() const { CPUFeatures f; if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI); + if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS); + if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl); + if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions); + if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE); return f; } @@ -105,20 +129,38 @@ CPUFeatures AA64ISAR0::GetCPUFeatures() const { if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM); if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM); if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag); + if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG); return f; } CPUFeatures AA64ISAR1::GetCPUFeatures() const { CPUFeatures f; if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP); + if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP); if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT); if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma); if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc); if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm); if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt); + if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB); + if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES); + if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16); + if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH); + if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM); + + // Only one of these fields should be non-zero, but they have the same + // encodings, so merge the logic. + int apx = std::max(Get(kAPI), Get(kAPA)); + if (apx >= 1) { + f.Combine(CPUFeatures::kPAuth); + // APA (rather than API) indicates QARMA. + if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA); + if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC); + if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2); + if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC); + if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined); + } - if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth); - if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA); if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric); if (Get(kGPA) >= 1) { f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA); @@ -132,6 +174,23 @@ CPUFeatures AA64MMFR1::GetCPUFeatures() const { return f; } +CPUFeatures AA64MMFR2::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT); + return f; +} + +CPUFeatures AA64ZFR0::GetCPUFeatures() const { + // This register is only available with SVE, but reads-as-zero in its absence, + // so it's always safe to read it. + CPUFeatures f; + if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM); + if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM); + if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM); + if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16); + return f; +} + int IDRegister::Get(IDRegister::Field field) const { int msb = field.GetMsb(); int lsb = field.GetLsb(); @@ -149,7 +208,8 @@ int IDRegister::Get(IDRegister::Field field) const { CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() { CPUFeatures f; -#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures()); +#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \ + f.Combine(Read##NAME().GetCPUFeatures()); VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG) #undef VIXL_COMBINE_ID_REG return f; @@ -163,49 +223,73 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather // than explicit bits, but explicit bits allow us to identify features that // the toolchain doesn't know about. - static const CPUFeatures::Feature kFeatureBits[] = { - // Bits 0-7 - CPUFeatures::kFP, - CPUFeatures::kNEON, - CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. - CPUFeatures::kAES, - CPUFeatures::kPmull1Q, - CPUFeatures::kSHA1, - CPUFeatures::kSHA2, - CPUFeatures::kCRC32, - // Bits 8-15 - CPUFeatures::kAtomics, - CPUFeatures::kFPHalf, - CPUFeatures::kNEONHalf, - CPUFeatures::kIDRegisterEmulation, - CPUFeatures::kRDM, - CPUFeatures::kJSCVT, - CPUFeatures::kFcma, - CPUFeatures::kRCpc, - // Bits 16-23 - CPUFeatures::kDCPoP, - CPUFeatures::kSHA3, - CPUFeatures::kSM3, - CPUFeatures::kSM4, - CPUFeatures::kDotProduct, - CPUFeatures::kSHA512, - CPUFeatures::kSVE, - CPUFeatures::kFHM, - // Bits 24-27 - CPUFeatures::kDIT, - CPUFeatures::kUSCAT, - CPUFeatures::kRCpcImm, - CPUFeatures::kFlagM - // Bits 28-31 are unassigned. - }; - static const size_t kFeatureBitCount = - sizeof(kFeatureBits) / sizeof(kFeatureBits[0]); - - unsigned long auxv = getauxval(AT_HWCAP); // NOLINT(runtime/int) - - VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte)); - for (size_t i = 0; i < kFeatureBitCount; i++) { - if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]); + static const CPUFeatures::Feature kFeatureBits[] = + {// Bits 0-7 + CPUFeatures::kFP, + CPUFeatures::kNEON, + CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. + CPUFeatures::kAES, + CPUFeatures::kPmull1Q, + CPUFeatures::kSHA1, + CPUFeatures::kSHA2, + CPUFeatures::kCRC32, + // Bits 8-15 + CPUFeatures::kAtomics, + CPUFeatures::kFPHalf, + CPUFeatures::kNEONHalf, + CPUFeatures::kIDRegisterEmulation, + CPUFeatures::kRDM, + CPUFeatures::kJSCVT, + CPUFeatures::kFcma, + CPUFeatures::kRCpc, + // Bits 16-23 + CPUFeatures::kDCPoP, + CPUFeatures::kSHA3, + CPUFeatures::kSM3, + CPUFeatures::kSM4, + CPUFeatures::kDotProduct, + CPUFeatures::kSHA512, + CPUFeatures::kSVE, + CPUFeatures::kFHM, + // Bits 24-31 + CPUFeatures::kDIT, + CPUFeatures::kUSCAT, + CPUFeatures::kRCpcImm, + CPUFeatures::kFlagM, + CPUFeatures::kSSBSControl, + CPUFeatures::kSB, + CPUFeatures::kPAuth, + CPUFeatures::kPAuthGeneric, + // Bits 32-39 + CPUFeatures::kDCCVADP, + CPUFeatures::kNone, // "sve2" + CPUFeatures::kNone, // "sveaes" + CPUFeatures::kNone, // "svepmull" + CPUFeatures::kNone, // "svebitperm" + CPUFeatures::kNone, // "svesha3" + CPUFeatures::kNone, // "svesm4" + CPUFeatures::kFrintToFixedSizedInt, + // Bits 40-47 + CPUFeatures::kSVEI8MM, + CPUFeatures::kSVEF32MM, + CPUFeatures::kSVEF64MM, + CPUFeatures::kSVEBF16, + CPUFeatures::kI8MM, + CPUFeatures::kBF16, + CPUFeatures::kDGH, + CPUFeatures::kRNG, + // Bits 48+ + CPUFeatures::kBTI}; + + uint64_t hwcap_low32 = getauxval(AT_HWCAP); + uint64_t hwcap_high32 = getauxval(AT_HWCAP2); + VIXL_ASSERT(IsUint32(hwcap_low32)); + VIXL_ASSERT(IsUint32(hwcap_high32)); + uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32); + + VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64); + for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) { + if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]); } #endif // VIXL_USE_LINUX_HWCAP @@ -218,17 +302,17 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( #ifdef __aarch64__ -#define VIXL_READ_ID_REG(NAME) \ - NAME CPU::Read##NAME() { \ - uint64_t value = 0; \ - __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \ - return NAME(value); \ +#define VIXL_READ_ID_REG(NAME, MRS_ARG) \ + NAME CPU::Read##NAME() { \ + uint64_t value = 0; \ + __asm__("mrs %0, " MRS_ARG : "=r"(value)); \ + return NAME(value); \ } #else // __aarch64__ -#define VIXL_READ_ID_REG(NAME) \ - NAME CPU::Read##NAME() { \ - /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \ - VIXL_ABORT(); \ +#define VIXL_READ_ID_REG(NAME, MRS_ARG) \ + NAME CPU::Read##NAME() { \ + VIXL_UNREACHABLE(); \ + return NAME(0); \ } #endif // __aarch64__ @@ -282,6 +366,27 @@ uint32_t CPU::GetCacheType() { } +// Query the SVE vector length. This requires CPUFeatures::kSVE. +int CPU::ReadSVEVectorLengthInBits() { +#ifdef __aarch64__ + uint64_t vl; + // To support compilers that don't understand `rdvl`, encode the value + // directly and move it manually. + __asm__( + " .word 0x04bf5100\n" // rdvl x0, #8 + " mov %[vl], x0\n" + : [vl] "=r"(vl) + : + : "x0"); + VIXL_ASSERT(vl <= INT_MAX); + return static_cast<int>(vl); +#else + VIXL_UNREACHABLE(); + return 0; +#endif +} + + void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) { #ifdef __aarch64__ // Implement the cache synchronisation for all targets where AArch64 is the diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h index d2b2ee87..2bf1e60f 100644 --- a/src/aarch64/cpu-aarch64.h +++ b/src/aarch64/cpu-aarch64.h @@ -56,7 +56,11 @@ class IDRegister { public: enum Type { kUnsigned, kSigned }; - explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {} + // This needs to be constexpr so that fields have "constant initialisation". + // This avoids initialisation order problems when these values are used to + // (dynamically) initialise static variables, etc. + explicit constexpr Field(int lsb, Type type = kUnsigned) + : lsb_(lsb), type_(type) {} static const int kMaxWidthInBits = 4; @@ -92,8 +96,11 @@ class AA64PFR0 : public IDRegister { private: static const Field kFP; static const Field kAdvSIMD; + static const Field kRAS; static const Field kSVE; static const Field kDIT; + static const Field kCSV2; + static const Field kCSV3; }; class AA64PFR1 : public IDRegister { @@ -104,6 +111,8 @@ class AA64PFR1 : public IDRegister { private: static const Field kBT; + static const Field kSSBS; + static const Field kMTE; }; class AA64ISAR0 : public IDRegister { @@ -125,6 +134,7 @@ class AA64ISAR0 : public IDRegister { static const Field kDP; static const Field kFHM; static const Field kTS; + static const Field kRNDR; }; class AA64ISAR1 : public IDRegister { @@ -145,6 +155,9 @@ class AA64ISAR1 : public IDRegister { static const Field kFRINTTS; static const Field kSB; static const Field kSPECRES; + static const Field kBF16; + static const Field kDGH; + static const Field kI8MM; }; class AA64MMFR1 : public IDRegister { @@ -157,6 +170,29 @@ class AA64MMFR1 : public IDRegister { static const Field kLO; }; +class AA64MMFR2 : public IDRegister { + public: + explicit AA64MMFR2(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kAT; +}; + +class AA64ZFR0 : public IDRegister { + public: + explicit AA64ZFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kBF16; + static const Field kI8MM; + static const Field kF32MM; + static const Field kF64MM; +}; + class CPU { public: // Initialise CPU support. @@ -184,6 +220,9 @@ class CPU { CPUFeatures::QueryIDRegistersOption option = CPUFeatures::kQueryIDRegistersIfAvailable); + // Query the SVE vector length. This requires CPUFeatures::kSVE. + static int ReadSVEVectorLengthInBits(); + // Handle tagged pointers. template <typename T> static T SetPointerTag(T pointer, uint64_t tag) { @@ -211,14 +250,18 @@ class CPU { } private: -#define VIXL_AARCH64_ID_REG_LIST(V) \ - V(AA64PFR0) \ - V(AA64PFR1) \ - V(AA64ISAR0) \ - V(AA64ISAR1) \ - V(AA64MMFR1) - -#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME(); +#define VIXL_AARCH64_ID_REG_LIST(V) \ + V(AA64PFR0, "ID_AA64PFR0_EL1") \ + V(AA64PFR1, "ID_AA64PFR1_EL1") \ + V(AA64ISAR0, "ID_AA64ISAR0_EL1") \ + V(AA64ISAR1, "ID_AA64ISAR1_EL1") \ + V(AA64MMFR1, "ID_AA64MMFR1_EL1") \ + /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \ + /* read them, but some compilers don't accept the symbolic names. */ \ + V(AA64MMFR2, "S3_0_C0_C7_2") \ + V(AA64ZFR0, "S3_0_C0_C4_4") + +#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME(); // On native AArch64 platforms, read the named CPU ID registers. These require // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64 // platforms. diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc index 474803a1..abe63d39 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.cc +++ b/src/aarch64/cpu-features-auditor-aarch64.cc @@ -870,7 +870,6 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) { scope.Record(CPUFeatures::kFP); if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf); } - USE(instr); } void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) { @@ -1068,6 +1067,165 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) { USE(instr); } +// Most SVE visitors require only SVE. +#define VIXL_SIMPLE_SVE_VISITOR_LIST(V) \ + V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitGatherLoad_VectorPlusImm) \ + V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_VectorPlusImm) \ + V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitScatterStore_VectorPlusImm) \ + V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_VectorPlusImm) \ + V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_VectorPlusImm) \ + V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitScatterStore_VectorPlusImm) \ + V(SVEAddressGeneration) \ + V(SVEBitwiseLogicalUnpredicated) \ + V(SVEBitwiseShiftUnpredicated) \ + V(SVEFFRInitialise) \ + V(SVEFFRWriteFromPredicate) \ + V(SVEFPAccumulatingReduction) \ + V(SVEFPArithmeticUnpredicated) \ + V(SVEFPCompareVectors) \ + V(SVEFPCompareWithZero) \ + V(SVEFPComplexAddition) \ + V(SVEFPComplexMulAdd) \ + V(SVEFPComplexMulAddIndex) \ + V(SVEFPFastReduction) \ + V(SVEFPMulIndex) \ + V(SVEFPMulAdd) \ + V(SVEFPMulAddIndex) \ + V(SVEFPUnaryOpUnpredicated) \ + V(SVEIncDecByPredicateCount) \ + V(SVEIndexGeneration) \ + V(SVEIntArithmeticUnpredicated) \ + V(SVEIntCompareSignedImm) \ + V(SVEIntCompareUnsignedImm) \ + V(SVEIntCompareVectors) \ + V(SVEIntMulAddPredicated) \ + V(SVEIntMulAddUnpredicated) \ + V(SVEIntReduction) \ + V(SVEIntUnaryArithmeticPredicated) \ + V(SVEMovprfx) \ + V(SVEMulIndex) \ + V(SVEPermuteVectorExtract) \ + V(SVEPermuteVectorInterleaving) \ + V(SVEPredicateCount) \ + V(SVEPredicateLogical) \ + V(SVEPropagateBreak) \ + V(SVEStackFrameAdjustment) \ + V(SVEStackFrameSize) \ + V(SVEVectorSelect) \ + V(SVEBitwiseLogical_Predicated) \ + V(SVEBitwiseLogicalWithImm_Unpredicated) \ + V(SVEBitwiseShiftByImm_Predicated) \ + V(SVEBitwiseShiftByVector_Predicated) \ + V(SVEBitwiseShiftByWideElements_Predicated) \ + V(SVEBroadcastBitmaskImm) \ + V(SVEBroadcastFPImm_Unpredicated) \ + V(SVEBroadcastGeneralRegister) \ + V(SVEBroadcastIndexElement) \ + V(SVEBroadcastIntImm_Unpredicated) \ + V(SVECompressActiveElements) \ + V(SVEConditionallyBroadcastElementToVector) \ + V(SVEConditionallyExtractElementToSIMDFPScalar) \ + V(SVEConditionallyExtractElementToGeneralRegister) \ + V(SVEConditionallyTerminateScalars) \ + V(SVEConstructivePrefix_Unpredicated) \ + V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ + V(SVEContiguousLoad_ScalarPlusImm) \ + V(SVEContiguousLoad_ScalarPlusScalar) \ + V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ + V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ + V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ + V(SVEContiguousPrefetch_ScalarPlusImm) \ + V(SVEContiguousPrefetch_ScalarPlusScalar) \ + V(SVEContiguousStore_ScalarPlusImm) \ + V(SVEContiguousStore_ScalarPlusScalar) \ + V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ + V(SVECopyFPImm_Predicated) \ + V(SVECopyGeneralRegisterToVector_Predicated) \ + V(SVECopyIntImm_Predicated) \ + V(SVEElementCount) \ + V(SVEExtractElementToSIMDFPScalarRegister) \ + V(SVEExtractElementToGeneralRegister) \ + V(SVEFPArithmetic_Predicated) \ + V(SVEFPArithmeticWithImm_Predicated) \ + V(SVEFPConvertPrecision) \ + V(SVEFPConvertToInt) \ + V(SVEFPExponentialAccelerator) \ + V(SVEFPRoundToIntegralValue) \ + V(SVEFPTrigMulAddCoefficient) \ + V(SVEFPTrigSelectCoefficient) \ + V(SVEFPUnaryOp) \ + V(SVEIncDecRegisterByElementCount) \ + V(SVEIncDecVectorByElementCount) \ + V(SVEInsertSIMDFPScalarRegister) \ + V(SVEInsertGeneralRegister) \ + V(SVEIntAddSubtractImm_Unpredicated) \ + V(SVEIntAddSubtractVectors_Predicated) \ + V(SVEIntCompareScalarCountAndLimit) \ + V(SVEIntConvertToFP) \ + V(SVEIntDivideVectors_Predicated) \ + V(SVEIntMinMaxImm_Unpredicated) \ + V(SVEIntMinMaxDifference_Predicated) \ + V(SVEIntMulImm_Unpredicated) \ + V(SVEIntMulVectors_Predicated) \ + V(SVELoadAndBroadcastElement) \ + V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \ + V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \ + V(SVELoadMultipleStructures_ScalarPlusImm) \ + V(SVELoadMultipleStructures_ScalarPlusScalar) \ + V(SVELoadPredicateRegister) \ + V(SVELoadVectorRegister) \ + V(SVEPartitionBreakCondition) \ + V(SVEPermutePredicateElements) \ + V(SVEPredicateFirstActive) \ + V(SVEPredicateInitialize) \ + V(SVEPredicateNextActive) \ + V(SVEPredicateReadFromFFR_Predicated) \ + V(SVEPredicateReadFromFFR_Unpredicated) \ + V(SVEPredicateTest) \ + V(SVEPredicateZero) \ + V(SVEPropagateBreakToNextPartition) \ + V(SVEReversePredicateElements) \ + V(SVEReverseVectorElements) \ + V(SVEReverseWithinElements) \ + V(SVESaturatingIncDecRegisterByElementCount) \ + V(SVESaturatingIncDecVectorByElementCount) \ + V(SVEStoreMultipleStructures_ScalarPlusImm) \ + V(SVEStoreMultipleStructures_ScalarPlusScalar) \ + V(SVEStorePredicateRegister) \ + V(SVEStoreVectorRegister) \ + V(SVETableLookup) \ + V(SVEUnpackPredicateElements) \ + V(SVEUnpackVectorElements) \ + V(SVEVectorSplice_Destructive) + +#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \ + void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \ + RecordInstructionFeaturesScope scope(this); \ + scope.Record(CPUFeatures::kSVE); \ + USE(instr); \ + } +VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR) +#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR +#undef VIXL_SIMPLE_SVE_VISITOR_LIST + void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) { RecordInstructionFeaturesScope scope(this); if (instr->Mask(SystemHintFMask) == SystemHintFixed) { diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc index ce1f33fb..c6859bbc 100644 --- a/src/aarch64/decoder-aarch64.cc +++ b/src/aarch64/decoder-aarch64.cc @@ -182,22 +182,45 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) { case M: \ bit_extract_fn = &Instruction::ExtractBits<M>; \ break; + INSTANTIATE_TEMPLATE(0x000001e0); + INSTANTIATE_TEMPLATE(0x00000400); INSTANTIATE_TEMPLATE(0x00000800); INSTANTIATE_TEMPLATE(0x00000c00); + INSTANTIATE_TEMPLATE(0x00001000); + INSTANTIATE_TEMPLATE(0x00001800); INSTANTIATE_TEMPLATE(0x00001c00); INSTANTIATE_TEMPLATE(0x00004000); INSTANTIATE_TEMPLATE(0x00008000); INSTANTIATE_TEMPLATE(0x0000f000); INSTANTIATE_TEMPLATE(0x0000fc00); + INSTANTIATE_TEMPLATE(0x00060010); + INSTANTIATE_TEMPLATE(0x00093e00); + INSTANTIATE_TEMPLATE(0x000c1000); + INSTANTIATE_TEMPLATE(0x00100000); + INSTANTIATE_TEMPLATE(0x00101800); + INSTANTIATE_TEMPLATE(0x00140000); + INSTANTIATE_TEMPLATE(0x00180000); + INSTANTIATE_TEMPLATE(0x00181000); + INSTANTIATE_TEMPLATE(0x00190000); + INSTANTIATE_TEMPLATE(0x00191400); + INSTANTIATE_TEMPLATE(0x001c0000); + INSTANTIATE_TEMPLATE(0x001c1800); INSTANTIATE_TEMPLATE(0x001f0000); INSTANTIATE_TEMPLATE(0x0020fc00); INSTANTIATE_TEMPLATE(0x0038f000); INSTANTIATE_TEMPLATE(0x00400000); + INSTANTIATE_TEMPLATE(0x00400010); INSTANTIATE_TEMPLATE(0x0040f000); + INSTANTIATE_TEMPLATE(0x00500000); INSTANTIATE_TEMPLATE(0x00800000); + INSTANTIATE_TEMPLATE(0x00800010); + INSTANTIATE_TEMPLATE(0x00801800); + INSTANTIATE_TEMPLATE(0x009f0000); INSTANTIATE_TEMPLATE(0x00c00000); + INSTANTIATE_TEMPLATE(0x00c00010); INSTANTIATE_TEMPLATE(0x00cf8000); INSTANTIATE_TEMPLATE(0x00db0000); + INSTANTIATE_TEMPLATE(0x00dc0000); INSTANTIATE_TEMPLATE(0x00e00003); INSTANTIATE_TEMPLATE(0x00f80400); INSTANTIATE_TEMPLATE(0x01e00000); @@ -233,6 +256,7 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) { INSTANTIATE_TEMPLATE(0xc4400000); INSTANTIATE_TEMPLATE(0xc4c00000); INSTANTIATE_TEMPLATE(0xe0400000); + INSTANTIATE_TEMPLATE(0xe120e000); INSTANTIATE_TEMPLATE(0xe3c00000); INSTANTIATE_TEMPLATE(0xf1200000); #undef INSTANTIATE_TEMPLATE @@ -259,20 +283,44 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) { instantiated = true; \ } INSTANTIATE_TEMPLATE(0x0000001c, 0x00000000); + INSTANTIATE_TEMPLATE(0x00000210, 0x00000000); + INSTANTIATE_TEMPLATE(0x000003c0, 0x00000000); + INSTANTIATE_TEMPLATE(0x00001c00, 0x00000000); + INSTANTIATE_TEMPLATE(0x00001c0f, 0x00000000); INSTANTIATE_TEMPLATE(0x00003000, 0x00000000); INSTANTIATE_TEMPLATE(0x00007800, 0x00000000); + INSTANTIATE_TEMPLATE(0x0000e000, 0x0000a000); INSTANTIATE_TEMPLATE(0x0000f000, 0x00000000); + INSTANTIATE_TEMPLATE(0x00030400, 0x00000000); INSTANTIATE_TEMPLATE(0x0003801f, 0x0000000d); + INSTANTIATE_TEMPLATE(0x00060210, 0x00000000); + INSTANTIATE_TEMPLATE(0x00060810, 0x00000000); + INSTANTIATE_TEMPLATE(0x00060a10, 0x00000000); + INSTANTIATE_TEMPLATE(0x00060bf0, 0x00000000); + INSTANTIATE_TEMPLATE(0x00061e10, 0x00000000); + INSTANTIATE_TEMPLATE(0x00061e10, 0x00000400); + INSTANTIATE_TEMPLATE(0x00070200, 0x00000000); + INSTANTIATE_TEMPLATE(0x000b1e10, 0x00000000); INSTANTIATE_TEMPLATE(0x000f0000, 0x00000000); + INSTANTIATE_TEMPLATE(0x00130e1f, 0x00000000); + INSTANTIATE_TEMPLATE(0x00130fff, 0x00000000); + INSTANTIATE_TEMPLATE(0x00180000, 0x00000000); + INSTANTIATE_TEMPLATE(0x00180000, 0x00100000); + INSTANTIATE_TEMPLATE(0x001e0000, 0x00000000); + INSTANTIATE_TEMPLATE(0x001f0000, 0x00000000); INSTANTIATE_TEMPLATE(0x001f0000, 0x001f0000); INSTANTIATE_TEMPLATE(0x0038e000, 0x00000000); INSTANTIATE_TEMPLATE(0x0039e000, 0x00002000); INSTANTIATE_TEMPLATE(0x003ae000, 0x00002000); INSTANTIATE_TEMPLATE(0x003ce000, 0x00042000); + INSTANTIATE_TEMPLATE(0x005f0000, 0x001f0000); INSTANTIATE_TEMPLATE(0x00780000, 0x00000000); + INSTANTIATE_TEMPLATE(0x00870210, 0x00000000); INSTANTIATE_TEMPLATE(0x00c00000, 0x00000000); INSTANTIATE_TEMPLATE(0x00c00000, 0x00800000); INSTANTIATE_TEMPLATE(0x00c00000, 0x00c00000); + INSTANTIATE_TEMPLATE(0x00c00010, 0x00800000); + INSTANTIATE_TEMPLATE(0x00ca1e10, 0x00000000); INSTANTIATE_TEMPLATE(0x01000010, 0x00000000); INSTANTIATE_TEMPLATE(0x20000800, 0x00000000); INSTANTIATE_TEMPLATE(0x20008000, 0x00000000); @@ -312,14 +360,16 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) { bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { // EitherOr optimisation: if there are only one or two patterns in the table, // try to optimise the node to exploit that. - if ((pattern_table_.size() == 2) && (GetSampledBitsCount() > 1)) { + size_t table_size = pattern_table_.size(); + if ((table_size <= 2) && (GetSampledBitsCount() > 1)) { // TODO: support 'x' in this optimisation by dropping the sampled bit // positions before making the mask/value. if ((strchr(pattern_table_[0].pattern, 'x') == NULL) && - (strcmp(pattern_table_[1].pattern, "otherwise") == 0)) { + ((table_size == 1) || + (strcmp(pattern_table_[1].pattern, "otherwise") == 0))) { // A pattern table consisting of a fixed pattern with no x's, and an - // "otherwise" case. Optimise this into an instruction mask and value - // test. + // "otherwise" or absent case. Optimise this into an instruction mask and + // value test. uint32_t single_decode_mask = 0; uint32_t single_decode_value = 0; std::vector<uint8_t> bits = GetSampledBits(); @@ -332,7 +382,6 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { single_decode_value |= 1U << bits[i]; } } - BitExtractFn bit_extract_fn = GetBitExtractFunction(single_decode_mask, single_decode_value); @@ -342,7 +391,9 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { // Set DecodeNode for when the instruction after masking doesn't match the // value. - CompileNodeForBits(decoder, pattern_table_[1].handler, 0); + const char* doesnt_match_handler = + (table_size == 1) ? "VisitUnallocated" : pattern_table_[1].handler; + CompileNodeForBits(decoder, doesnt_match_handler, 0); // Set DecodeNode for when it does match. CompileNodeForBits(decoder, pattern_table_[0].handler, 1); diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h index c0f47c36..38540195 100644 --- a/src/aarch64/decoder-aarch64.h +++ b/src/aarch64/decoder-aarch64.h @@ -38,99 +38,239 @@ // List macro containing all visitors needed by the decoder class. -#define VISITOR_LIST_THAT_RETURN(V) \ - V(AddSubExtended) \ - V(AddSubImmediate) \ - V(AddSubShifted) \ - V(AddSubWithCarry) \ - V(AtomicMemory) \ - V(Bitfield) \ - V(CompareBranch) \ - V(ConditionalBranch) \ - V(ConditionalCompareImmediate) \ - V(ConditionalCompareRegister) \ - V(ConditionalSelect) \ - V(Crypto2RegSHA) \ - V(Crypto3RegSHA) \ - V(CryptoAES) \ - V(DataProcessing1Source) \ - V(DataProcessing2Source) \ - V(DataProcessing3Source) \ - V(Exception) \ - V(Extract) \ - V(EvaluateIntoFlags) \ - V(FPCompare) \ - V(FPConditionalCompare) \ - V(FPConditionalSelect) \ - V(FPDataProcessing1Source) \ - V(FPDataProcessing2Source) \ - V(FPDataProcessing3Source) \ - V(FPFixedPointConvert) \ - V(FPImmediate) \ - V(FPIntegerConvert) \ - V(LoadLiteral) \ - V(LoadStoreExclusive) \ - V(LoadStorePAC) \ - V(LoadStorePairNonTemporal) \ - V(LoadStorePairOffset) \ - V(LoadStorePairPostIndex) \ - V(LoadStorePairPreIndex) \ - V(LoadStorePostIndex) \ - V(LoadStorePreIndex) \ - V(LoadStoreRCpcUnscaledOffset) \ - V(LoadStoreRegisterOffset) \ - V(LoadStoreUnscaledOffset) \ - V(LoadStoreUnsignedOffset) \ - V(LogicalImmediate) \ - V(LogicalShifted) \ - V(MoveWideImmediate) \ - V(NEON2RegMisc) \ - V(NEON2RegMiscFP16) \ - V(NEON3Different) \ - V(NEON3Same) \ - V(NEON3SameExtra) \ - V(NEON3SameFP16) \ - V(NEONAcrossLanes) \ - V(NEONByIndexedElement) \ - V(NEONCopy) \ - V(NEONExtract) \ - V(NEONLoadStoreMultiStruct) \ - V(NEONLoadStoreMultiStructPostIndex) \ - V(NEONLoadStoreSingleStruct) \ - V(NEONLoadStoreSingleStructPostIndex) \ - V(NEONModifiedImmediate) \ - V(NEONPerm) \ - V(NEONScalar2RegMisc) \ - V(NEONScalar2RegMiscFP16) \ - V(NEONScalar3Diff) \ - V(NEONScalar3Same) \ - V(NEONScalar3SameExtra) \ - V(NEONScalar3SameFP16) \ - V(NEONScalarByIndexedElement) \ - V(NEONScalarCopy) \ - V(NEONScalarPairwise) \ - V(NEONScalarShiftImmediate) \ - V(NEONShiftImmediate) \ - V(NEONTable) \ - V(PCRelAddressing) \ - V(RotateRightIntoFlags) \ - V(System) \ - V(TestBranch) \ - V(UnconditionalBranch) \ - V(UnconditionalBranchToRegister) - -// TODO: We shouldn't expose debug-only behaviour like this. Instead, we should -// use release-mode aborts where appropriate, and merge thse into a single -// no-return list. -#define VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \ - V(Unallocated) \ +#define VISITOR_LIST_THAT_RETURN(V) \ + V(AddSubExtended) \ + V(AddSubImmediate) \ + V(AddSubShifted) \ + V(AddSubWithCarry) \ + V(AtomicMemory) \ + V(Bitfield) \ + V(CompareBranch) \ + V(ConditionalBranch) \ + V(ConditionalCompareImmediate) \ + V(ConditionalCompareRegister) \ + V(ConditionalSelect) \ + V(Crypto2RegSHA) \ + V(Crypto3RegSHA) \ + V(CryptoAES) \ + V(DataProcessing1Source) \ + V(DataProcessing2Source) \ + V(DataProcessing3Source) \ + V(EvaluateIntoFlags) \ + V(Exception) \ + V(Extract) \ + V(FPCompare) \ + V(FPConditionalCompare) \ + V(FPConditionalSelect) \ + V(FPDataProcessing1Source) \ + V(FPDataProcessing2Source) \ + V(FPDataProcessing3Source) \ + V(FPFixedPointConvert) \ + V(FPImmediate) \ + V(FPIntegerConvert) \ + V(LoadLiteral) \ + V(LoadStoreExclusive) \ + V(LoadStorePAC) \ + V(LoadStorePairNonTemporal) \ + V(LoadStorePairOffset) \ + V(LoadStorePairPostIndex) \ + V(LoadStorePairPreIndex) \ + V(LoadStorePostIndex) \ + V(LoadStorePreIndex) \ + V(LoadStoreRCpcUnscaledOffset) \ + V(LoadStoreRegisterOffset) \ + V(LoadStoreUnscaledOffset) \ + V(LoadStoreUnsignedOffset) \ + V(LogicalImmediate) \ + V(LogicalShifted) \ + V(MoveWideImmediate) \ + V(NEON2RegMisc) \ + V(NEON2RegMiscFP16) \ + V(NEON3Different) \ + V(NEON3Same) \ + V(NEON3SameExtra) \ + V(NEON3SameFP16) \ + V(NEONAcrossLanes) \ + V(NEONByIndexedElement) \ + V(NEONCopy) \ + V(NEONExtract) \ + V(NEONLoadStoreMultiStruct) \ + V(NEONLoadStoreMultiStructPostIndex) \ + V(NEONLoadStoreSingleStruct) \ + V(NEONLoadStoreSingleStructPostIndex) \ + V(NEONModifiedImmediate) \ + V(NEONPerm) \ + V(NEONScalar2RegMisc) \ + V(NEONScalar2RegMiscFP16) \ + V(NEONScalar3Diff) \ + V(NEONScalar3Same) \ + V(NEONScalar3SameExtra) \ + V(NEONScalar3SameFP16) \ + V(NEONScalarByIndexedElement) \ + V(NEONScalarCopy) \ + V(NEONScalarPairwise) \ + V(NEONScalarShiftImmediate) \ + V(NEONShiftImmediate) \ + V(NEONTable) \ + V(PCRelAddressing) \ + V(RotateRightIntoFlags) \ + V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitGatherLoad_VectorPlusImm) \ + V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_VectorPlusImm) \ + V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitScatterStore_VectorPlusImm) \ + V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_VectorPlusImm) \ + V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_VectorPlusImm) \ + V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitScatterStore_VectorPlusImm) \ + V(SVEAddressGeneration) \ + V(SVEBitwiseLogicalUnpredicated) \ + V(SVEBitwiseShiftUnpredicated) \ + V(SVEFFRInitialise) \ + V(SVEFFRWriteFromPredicate) \ + V(SVEFPAccumulatingReduction) \ + V(SVEFPArithmeticUnpredicated) \ + V(SVEFPCompareVectors) \ + V(SVEFPCompareWithZero) \ + V(SVEFPComplexAddition) \ + V(SVEFPComplexMulAdd) \ + V(SVEFPComplexMulAddIndex) \ + V(SVEFPFastReduction) \ + V(SVEFPMulIndex) \ + V(SVEFPMulAdd) \ + V(SVEFPMulAddIndex) \ + V(SVEFPUnaryOpUnpredicated) \ + V(SVEIncDecByPredicateCount) \ + V(SVEIndexGeneration) \ + V(SVEIntArithmeticUnpredicated) \ + V(SVEIntCompareSignedImm) \ + V(SVEIntCompareUnsignedImm) \ + V(SVEIntCompareVectors) \ + V(SVEIntMulAddPredicated) \ + V(SVEIntMulAddUnpredicated) \ + V(SVEIntReduction) \ + V(SVEIntUnaryArithmeticPredicated) \ + V(SVEMovprfx) \ + V(SVEMulIndex) \ + V(SVEPermuteVectorExtract) \ + V(SVEPermuteVectorInterleaving) \ + V(SVEPredicateCount) \ + V(SVEPredicateLogical) \ + V(SVEPropagateBreak) \ + V(SVEStackFrameAdjustment) \ + V(SVEStackFrameSize) \ + V(SVEVectorSelect) \ + V(SVEBitwiseLogical_Predicated) \ + V(SVEBitwiseLogicalWithImm_Unpredicated) \ + V(SVEBitwiseShiftByImm_Predicated) \ + V(SVEBitwiseShiftByVector_Predicated) \ + V(SVEBitwiseShiftByWideElements_Predicated) \ + V(SVEBroadcastBitmaskImm) \ + V(SVEBroadcastFPImm_Unpredicated) \ + V(SVEBroadcastGeneralRegister) \ + V(SVEBroadcastIndexElement) \ + V(SVEBroadcastIntImm_Unpredicated) \ + V(SVECompressActiveElements) \ + V(SVEConditionallyBroadcastElementToVector) \ + V(SVEConditionallyExtractElementToSIMDFPScalar) \ + V(SVEConditionallyExtractElementToGeneralRegister) \ + V(SVEConditionallyTerminateScalars) \ + V(SVEConstructivePrefix_Unpredicated) \ + V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ + V(SVEContiguousLoad_ScalarPlusImm) \ + V(SVEContiguousLoad_ScalarPlusScalar) \ + V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ + V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ + V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ + V(SVEContiguousPrefetch_ScalarPlusImm) \ + V(SVEContiguousPrefetch_ScalarPlusScalar) \ + V(SVEContiguousStore_ScalarPlusImm) \ + V(SVEContiguousStore_ScalarPlusScalar) \ + V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ + V(SVECopyFPImm_Predicated) \ + V(SVECopyGeneralRegisterToVector_Predicated) \ + V(SVECopyIntImm_Predicated) \ + V(SVEElementCount) \ + V(SVEExtractElementToSIMDFPScalarRegister) \ + V(SVEExtractElementToGeneralRegister) \ + V(SVEFPArithmetic_Predicated) \ + V(SVEFPArithmeticWithImm_Predicated) \ + V(SVEFPConvertPrecision) \ + V(SVEFPConvertToInt) \ + V(SVEFPExponentialAccelerator) \ + V(SVEFPRoundToIntegralValue) \ + V(SVEFPTrigMulAddCoefficient) \ + V(SVEFPTrigSelectCoefficient) \ + V(SVEFPUnaryOp) \ + V(SVEIncDecRegisterByElementCount) \ + V(SVEIncDecVectorByElementCount) \ + V(SVEInsertSIMDFPScalarRegister) \ + V(SVEInsertGeneralRegister) \ + V(SVEIntAddSubtractImm_Unpredicated) \ + V(SVEIntAddSubtractVectors_Predicated) \ + V(SVEIntCompareScalarCountAndLimit) \ + V(SVEIntConvertToFP) \ + V(SVEIntDivideVectors_Predicated) \ + V(SVEIntMinMaxImm_Unpredicated) \ + V(SVEIntMinMaxDifference_Predicated) \ + V(SVEIntMulImm_Unpredicated) \ + V(SVEIntMulVectors_Predicated) \ + V(SVELoadAndBroadcastElement) \ + V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \ + V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \ + V(SVELoadMultipleStructures_ScalarPlusImm) \ + V(SVELoadMultipleStructures_ScalarPlusScalar) \ + V(SVELoadPredicateRegister) \ + V(SVELoadVectorRegister) \ + V(SVEPartitionBreakCondition) \ + V(SVEPermutePredicateElements) \ + V(SVEPredicateFirstActive) \ + V(SVEPredicateInitialize) \ + V(SVEPredicateNextActive) \ + V(SVEPredicateReadFromFFR_Predicated) \ + V(SVEPredicateReadFromFFR_Unpredicated) \ + V(SVEPredicateTest) \ + V(SVEPredicateZero) \ + V(SVEPropagateBreakToNextPartition) \ + V(SVEReversePredicateElements) \ + V(SVEReverseVectorElements) \ + V(SVEReverseWithinElements) \ + V(SVESaturatingIncDecRegisterByElementCount) \ + V(SVESaturatingIncDecVectorByElementCount) \ + V(SVEStoreMultipleStructures_ScalarPlusImm) \ + V(SVEStoreMultipleStructures_ScalarPlusScalar) \ + V(SVEStorePredicateRegister) \ + V(SVEStoreVectorRegister) \ + V(SVETableLookup) \ + V(SVEUnpackPredicateElements) \ + V(SVEUnpackVectorElements) \ + V(SVEVectorSplice_Destructive) \ + V(System) \ + V(TestBranch) \ + V(Unallocated) \ + V(UnconditionalBranch) \ + V(UnconditionalBranchToRegister) \ V(Unimplemented) #define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved) -#define VISITOR_LIST(V) \ - VISITOR_LIST_THAT_RETURN(V) \ - VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \ +#define VISITOR_LIST(V) \ + VISITOR_LIST_THAT_RETURN(V) \ VISITOR_LIST_THAT_DONT_RETURN(V) namespace vixl { @@ -138,6 +278,12 @@ namespace aarch64 { // The Visitor interface. Disassembler and simulator (and other tools) // must provide implementations for all of these functions. +// +// Note that this class must change in breaking ways with even minor additions +// to VIXL, and so its API should be considered unstable. User classes that +// inherit from this one should be expected to break even on minor version +// updates. If this is a problem, consider using DecoderVisitorWithDefaults +// instead. class DecoderVisitor { public: enum VisitorConstness { kConstVisitor, kNonConstVisitor }; @@ -160,6 +306,25 @@ class DecoderVisitor { const VisitorConstness constness_; }; +// As above, but a default (no-op) implementation for each visitor is provided. +// This is useful for derived class that only care about specific visitors. +// +// A minor version update may add a visitor, but will never remove one, so it is +// safe (and recommended) to use `override` in derived classes. +class DecoderVisitorWithDefaults : public DecoderVisitor { + public: + explicit DecoderVisitorWithDefaults( + VisitorConstness constness = kConstVisitor) + : DecoderVisitor(constness) {} + + virtual ~DecoderVisitorWithDefaults() {} + +#define DECLARE(A) \ + virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE { USE(instr); } + VISITOR_LIST(DECLARE) +#undef DECLARE +}; + class DecodeNode; class CompiledDecodeNode; @@ -257,7 +422,7 @@ class Decoder { }; const int kMaxDecodeSampledBits = 16; -const int kMaxDecodeMappings = 22; +const int kMaxDecodeMappings = 100; typedef void (Decoder::*DecodeFnPtr)(const Instruction*); typedef uint32_t (Instruction::*BitExtractFn)(void) const; diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h index def27fa1..53f283bb 100644 --- a/src/aarch64/decoder-constants-aarch64.h +++ b/src/aarch64/decoder-constants-aarch64.h @@ -39,6 +39,7 @@ static const DecodeMapping kDecodeMapping[] = { { "Root", {28, 27, 26, 25}, { {"0000", "DecodeReserved"}, + {"0010", "DecodeSVE"}, {"100x", "DecodeDataProcessingImmediate"}, {"101x", "DecodeBranchesExceptionAndSystem"}, {"x1x0", "DecodeLoadsAndStores"}, @@ -124,6 +125,720 @@ static const DecodeMapping kDecodeMapping[] = { }, }, + { "DecodeSVE", + {31, 30, 29, 24, 21, 15, 14, 13}, + { {"00000x1x", "VisitSVEIntMulAddPredicated"}, + {"00000000", "DecodeSVE00000000"}, + {"00000001", "DecodeSVE00000001"}, + {"00000100", "DecodeSVE00000100"}, + {"00000101", "VisitSVEIntUnaryArithmeticPredicated"}, + {"00001000", "VisitSVEIntArithmeticUnpredicated"}, + {"00001001", "VisitSVEBitwiseLogicalUnpredicated"}, + {"00001010", "DecodeSVE00001010"}, + {"00001100", "VisitSVEBitwiseShiftUnpredicated"}, + {"00001101", "DecodeSVE00001101"}, + {"00001110", "DecodeSVE00001110"}, + {"00001111", "DecodeSVE00001111"}, + {"000100xx", "DecodeSVE000100xx"}, + {"0001010x", "DecodeSVE0001010x"}, + {"00010110", "DecodeSVE00010110"}, + {"00010111", "DecodeSVE00010111"}, + {"00011000", "VisitSVEPermuteVectorExtract"}, + {"00011001", "DecodeSVE00011001"}, + {"00011010", "DecodeSVE00011010"}, + {"00011011", "VisitSVEPermuteVectorInterleaving"}, + {"00011100", "DecodeSVE00011100"}, + {"00011101", "DecodeSVE00011101"}, + {"0001111x", "VisitSVEVectorSelect"}, + {"00100xxx", "VisitSVEIntCompareVectors"}, + {"00101xxx", "VisitSVEIntCompareUnsignedImm"}, + {"00110x0x", "VisitSVEIntCompareSignedImm"}, + {"0011001x", "DecodeSVE0011001x"}, + {"00110110", "DecodeSVE00110110"}, + {"00110111", "DecodeSVE00110111"}, + {"00111000", "VisitSVEIntCompareScalarCountAndLimit"}, + {"00111001", "UnallocSVEConditionallyTerminateScalars"}, + {"00111100", "DecodeSVE00111100"}, + {"00111101", "UnallocSVEPredicateCount"}, + {"0011111x", "DecodeSVE0011111x"}, + {"010000xx", "VisitSVEIntMulAddUnpredicated"}, + {"01001xxx", "VisitSVEMulIndex"}, + {"011000xx", "VisitSVEFPComplexMulAdd"}, + {"01100100", "UnallocSVEFPComplexAddition"}, + {"01101000", "DecodeSVE01101000"}, + {"01101001", "UnallocSVEFPMulIndex"}, + {"01110x1x", "VisitSVEFPCompareVectors"}, + {"01110000", "VisitSVEFPArithmeticUnpredicated"}, + {"01110001", "DecodeSVE01110001"}, + {"01110100", "DecodeSVE01110100"}, + {"01110101", "DecodeSVE01110101"}, + {"01111xxx", "VisitSVEFPMulAdd"}, + {"100x010x", "UnallocSVELoadAndBroadcastElement"}, + {"100x0110", "DecodeSVE100x0110"}, + {"100x0111", "DecodeSVE100x0111"}, + {"100x11xx", "DecodeSVE100x11xx"}, + {"100000xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, + {"100010xx", "DecodeSVE100010xx"}, + {"100100x1", "DecodeSVE100100x1"}, + {"10010000", "DecodeSVE10010000"}, + {"10010010", "DecodeSVE10010010"}, + {"100110x1", "DecodeSVE100110x1"}, + {"10011000", "DecodeSVE10011000"}, + {"10011010", "DecodeSVE10011010"}, + {"101xx000", "VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar"}, + {"101xx001", "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm"}, + {"101xx010", "VisitSVEContiguousLoad_ScalarPlusScalar"}, + {"101xx011", "VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar"}, + {"101xx101", "DecodeSVE101xx101"}, + {"101x0110", "DecodeSVE101x0110"}, + {"101x0111", "DecodeSVE101x0111"}, + {"101x1110", "VisitSVELoadMultipleStructures_ScalarPlusScalar"}, + {"101x1111", "DecodeSVE101x1111"}, + {"110x00xx", "VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets"}, + {"110x0111", "DecodeSVE110x0111"}, + {"1100010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + {"11000110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + {"110010xx", "DecodeSVE110010xx"}, + {"110011xx", "DecodeSVE110011xx"}, + {"1101010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + {"11010110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + {"110110xx", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"}, + {"110111xx", "DecodeSVE110111xx"}, + {"111x0011", "DecodeSVE111x0011"}, + {"111x01x0", "DecodeSVE111x01x0"}, + {"111x0101", "DecodeSVE111x0101"}, + {"111x0111", "DecodeSVE111x0111"}, + {"111x1011", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"}, + {"111x11x0", "DecodeSVE111x11x0"}, + {"111x1101", "DecodeSVE111x1101"}, + {"111x1111", "DecodeSVE111x1111"}, + {"1110x010", "VisitSVEContiguousStore_ScalarPlusScalar"}, + {"1111x000", "UnallocSVEStorePredicateRegister"}, + {"1111x010", "DecodeSVE1111x010"}, + }, + }, + + { "DecodeSVE00000000", + {20, 19, 18}, + { {"00x", "VisitSVEIntAddSubtractVectors_Predicated"}, + {"01x", "VisitSVEIntMinMaxDifference_Predicated"}, + {"100", "VisitSVEIntMulVectors_Predicated"}, + {"101", "VisitSVEIntDivideVectors_Predicated"}, + {"11x", "VisitSVEBitwiseLogical_Predicated"}, + }, + }, + + { "DecodeSVE00000100", + {20, 19}, + { {"0x", "VisitSVEBitwiseShiftByImm_Predicated"}, + {"10", "VisitSVEBitwiseShiftByVector_Predicated"}, + {"11", "VisitSVEBitwiseShiftByWideElements_Predicated"}, + }, + }, + + { "DecodeSVE00001010", + {23, 12, 11}, + { {"x0x", "VisitSVEIndexGeneration"}, + {"010", "VisitSVEStackFrameAdjustment"}, + {"110", "UnallocSVEStackFrameSize"}, + }, + }, + + { "UnallocSVEStackFrameSize", + {22, 20, 19, 18, 17, 16}, + { {"011111", "VisitSVEStackFrameSize"}, + }, + }, + + { "DecodeSVE00001101", + {12, 11, 10}, + { {"0xx", "VisitSVEAddressGeneration"}, + {"10x", "VisitSVEFPTrigSelectCoefficient"}, + {"110", "VisitSVEFPExponentialAccelerator"}, + {"111", "VisitSVEConstructivePrefix_Unpredicated"}, + }, + }, + + { "DecodeSVE00001110", + {20, 12, 11}, + { {"00x", "VisitSVESaturatingIncDecVectorByElementCount"}, + {"100", "VisitSVEIncDecVectorByElementCount"}, + }, + }, + + { "DecodeSVE00001111", + {20, 12, 11}, + { {"x1x", "VisitSVESaturatingIncDecRegisterByElementCount"}, + {"000", "VisitSVEElementCount"}, + {"100", "VisitSVEIncDecRegisterByElementCount"}, + }, + }, + + { "DecodeSVE000100xx", + {23, 22, 20, 19, 18}, + { {"xx1xx", "VisitSVECopyIntImm_Predicated"}, + {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"11000", "VisitSVEBroadcastBitmaskImm"}, + }, + }, + + { "DecodeSVE0001010x", + {23, 22, 20, 19, 18}, + { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"11000", "VisitSVEBroadcastBitmaskImm"}, + }, + }, + + { "DecodeSVE00010110", + {23, 22, 20, 19, 18}, + { {"xx1xx", "VisitSVECopyFPImm_Predicated"}, + {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"11000", "VisitSVEBroadcastBitmaskImm"}, + }, + }, + + { "DecodeSVE00010111", + {23, 22, 20, 19, 18}, + { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, + {"11000", "VisitSVEBroadcastBitmaskImm"}, + }, + }, + + { "UnallocSVEBroadcastIndexElement", + {10}, + { {"0", "VisitSVEBroadcastIndexElement"}, + }, + }, + + { "UnallocSVETableLookup", + {10}, + { {"0", "VisitSVETableLookup"}, + }, + }, + + { "UnallocSVEBroadcastGeneralRegister", + {17, 16, 10}, + { {"000", "VisitSVEBroadcastGeneralRegister"}, + }, + }, + + { "UnallocSVEInsertGeneralRegister", + {17, 16, 10}, + { {"000", "VisitSVEInsertGeneralRegister"}, + }, + }, + + { "UnallocSVEUnpackVectorElements", + {10}, + { {"0", "VisitSVEUnpackVectorElements"}, + }, + }, + + { "UnallocSVEInsertSIMDFPScalarRegister", + {17, 16, 10}, + { {"000", "VisitSVEInsertSIMDFPScalarRegister"}, + }, + }, + + { "UnallocSVEReverseVectorElements", + {17, 16, 10}, + { {"000", "VisitSVEReverseVectorElements"}, + }, + }, + + { "DecodeSVE00011001", + {20, 19, 18, 12, 11}, + { {"xxx00", "UnallocSVEBroadcastIndexElement"}, + {"xxx10", "UnallocSVETableLookup"}, + {"00011", "UnallocSVEBroadcastGeneralRegister"}, + {"00111", "UnallocSVEInsertGeneralRegister"}, + {"10011", "UnallocSVEUnpackVectorElements"}, + {"10111", "UnallocSVEInsertSIMDFPScalarRegister"}, + {"11011", "UnallocSVEReverseVectorElements"}, + }, + }, + + { "UnallocSVEPermutePredicateElements", + {9, 4}, + { {"00", "VisitSVEPermutePredicateElements"}, + }, + }, + + { "UnallocSVEUnpackPredicateElements", + {23, 22, 19, 17, 12, 11, 10, 9, 4}, + { {"000000000", "VisitSVEUnpackPredicateElements"}, + }, + }, + + { "UnallocSVEReversePredicateElements", + {19, 17, 16, 12, 11, 10, 9, 4}, + { {"00000000", "VisitSVEReversePredicateElements"}, + }, + }, + + { "DecodeSVE00011010", + {20, 18}, + { {"0x", "UnallocSVEPermutePredicateElements"}, + {"10", "UnallocSVEUnpackPredicateElements"}, + {"11", "UnallocSVEReversePredicateElements"}, + }, + }, + + { "DecodeSVE00011100", + {23, 20, 19, 18, 17, 16}, + { {"x00000", "VisitSVECopySIMDFPScalarRegisterToVector_Predicated"}, + {"x0001x", "VisitSVEExtractElementToSIMDFPScalarRegister"}, + {"x001xx", "VisitSVEReverseWithinElements"}, + {"x0100x", "VisitSVEConditionallyBroadcastElementToVector"}, + {"x0101x", "VisitSVEConditionallyExtractElementToSIMDFPScalar"}, + {"x01100", "VisitSVEVectorSplice_Destructive"}, + {"100001", "VisitSVECompressActiveElements"}, + }, + }, + + { "DecodeSVE00011101", + {20, 19, 18, 17, 16}, + { {"0000x", "VisitSVEExtractElementToGeneralRegister"}, + {"01000", "VisitSVECopyGeneralRegisterToVector_Predicated"}, + {"1000x", "VisitSVEConditionallyExtractElementToGeneralRegister"}, + }, + }, + + { "UnallocSVEPartitionBreakCondition", + {18, 17, 16, 9}, + { {"0000", "VisitSVEPartitionBreakCondition"}, + }, + }, + + { "UnallocSVEPropagateBreakToNextPartition", + {23, 18, 17, 16, 9, 4}, + { {"000000", "VisitSVEPropagateBreakToNextPartition"}, + }, + }, + + { "DecodeSVE0011001x", + {20, 19}, + { {"0x", "VisitSVEPredicateLogical"}, + {"10", "UnallocSVEPartitionBreakCondition"}, + {"11", "UnallocSVEPropagateBreakToNextPartition"}, + }, + }, + + { "UnallocSVEPredicateTest", + {18, 17, 9, 4}, + { {"0000", "VisitSVEPredicateTest"}, + }, + }, + + { "UnallocSVEPredicateFirstActive", + {18, 17, 12, 11, 10, 9, 4}, + { {"0000000", "VisitSVEPredicateFirstActive"}, + }, + }, + + { "UnallocSVEPredicateNextActive", + {18, 17, 12, 11, 10, 9, 4}, + { {"0000100", "VisitSVEPredicateNextActive"}, + }, + }, + + { "DecodeSVE00110110", + {20, 19, 16}, + { {"0xx", "VisitSVEPropagateBreak"}, + {"100", "UnallocSVEPredicateTest"}, + {"110", "UnallocSVEPredicateFirstActive"}, + {"111", "UnallocSVEPredicateNextActive"}, + }, + }, + + { "UnallocSVEPredicateTest", + {18, 17, 9, 4}, + { {"0000", "VisitSVEPredicateTest"}, + }, + }, + + { "UnallocSVEPredicateInitialize", + {18, 17, 11, 4}, + { {"0000", "VisitSVEPredicateInitialize"}, + }, + }, + + { "UnallocSVEPredicateZero", + {18, 17, 11, 9, 8, 7, 6, 5, 4}, + { {"000000000", "VisitSVEPredicateZero"}, + }, + }, + + { "UnallocSVEPredicateReadFromFFR_Predicated", + {18, 17, 11, 9, 4}, + { {"00000", "VisitSVEPredicateReadFromFFR_Predicated"}, + }, + }, + + { "UnallocSVEPredicateReadFromFFR_Unpredicated", + {18, 17, 11, 9, 8, 7, 6, 5, 4}, + { {"000000000", "VisitSVEPredicateReadFromFFR_Unpredicated"}, + }, + }, + + { "DecodeSVE00110111", + {20, 19, 16, 12, 10}, + { {"0xxxx", "VisitSVEPropagateBreak"}, + {"100xx", "UnallocSVEPredicateTest"}, + {"11x00", "UnallocSVEPredicateInitialize"}, + {"11001", "UnallocSVEPredicateZero"}, + {"11010", "UnallocSVEPredicateReadFromFFR_Predicated"}, + {"11110", "UnallocSVEPredicateReadFromFFR_Unpredicated"}, + }, + }, + + { "UnallocSVEConditionallyTerminateScalars", + {12, 11, 10, 3, 2, 1, 0}, + { {"0000000", "VisitSVEConditionallyTerminateScalars"}, + }, + }, + + { "UnallocSVEPredicateCount_2", + {20}, + { {"0", "VisitSVEPredicateCount"}, + }, + }, + + { "UnallocSVEIncDecByPredicateCount", + {20}, + { {"0", "VisitSVEIncDecByPredicateCount"}, + }, + }, + + { "UnallocSVEFFRWriteFromPredicate", + {20, 17, 16, 11, 10, 9, 4, 3, 2, 1, 0}, + { {"00000000000", "VisitSVEFFRWriteFromPredicate"}, + }, + }, + + { "UnallocSVEFFRInitialise", + {20, 17, 16, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, + { {"000000000000000", "VisitSVEFFRInitialise"}, + }, + }, + + { "DecodeSVE00111100", + {19, 18, 12}, + { {"0xx", "UnallocSVEPredicateCount_2"}, + {"1x0", "UnallocSVEIncDecByPredicateCount"}, + {"101", "UnallocSVEFFRWriteFromPredicate"}, + {"111", "UnallocSVEFFRInitialise"}, + }, + }, + + { "UnallocSVEPredicateCount", + {20, 19}, + { {"00", "VisitSVEPredicateCount"}, + }, + }, + + { "DecodeSVE0011111x", + {20, 19, 16}, + { {"00x", "VisitSVEIntAddSubtractImm_Unpredicated"}, + {"01x", "VisitSVEIntMinMaxImm_Unpredicated"}, + {"10x", "VisitSVEIntMulImm_Unpredicated"}, + {"110", "VisitSVEBroadcastIntImm_Unpredicated"}, + {"111", "VisitSVEBroadcastFPImm_Unpredicated"}, + }, + }, + + { "UnallocSVEFPComplexAddition", + {20, 19, 18, 17}, + { {"0000", "VisitSVEFPComplexAddition"}, + }, + }, + + { "DecodeSVE01101000", + {12, 11}, + { {"00", "VisitSVEFPMulAddIndex"}, + {"1x", "VisitSVEFPComplexMulAddIndex"}, + }, + }, + + { "UnallocSVEFPMulIndex", + {12, 11, 10}, + { {"000", "VisitSVEFPMulIndex"}, + }, + }, + + { "DecodeSVE01110001", + {20, 19, 12}, + { {"00x", "VisitSVEFPFastReduction"}, + {"011", "VisitSVEFPUnaryOpUnpredicated"}, + {"10x", "VisitSVEFPCompareWithZero"}, + {"11x", "VisitSVEFPAccumulatingReduction"}, + }, + }, + + { "UnallocSVEFPTrigMulAddCoefficient", + {12, 11, 10}, + { {"000", "VisitSVEFPTrigMulAddCoefficient"}, + }, + }, + + { "UnallocSVEFPArithmeticWithImm_Predicated", + {9, 8, 7, 6}, + { {"0000", "VisitSVEFPArithmeticWithImm_Predicated"}, + }, + }, + + { "DecodeSVE01110100", + {20, 19}, + { {"0x", "VisitSVEFPArithmetic_Predicated"}, + {"10", "UnallocSVEFPTrigMulAddCoefficient"}, + {"11", "UnallocSVEFPArithmeticWithImm_Predicated"}, + }, + }, + + { "DecodeSVE01110101", + {20, 19, 18}, + { {"00x", "VisitSVEFPRoundToIntegralValue"}, + {"010", "VisitSVEFPConvertPrecision"}, + {"011", "VisitSVEFPUnaryOp"}, + {"10x", "VisitSVEIntConvertToFP"}, + {"11x", "VisitSVEFPConvertToInt"}, + }, + }, + + { "UnallocSVELoadAndBroadcastElement", + {22}, + { {"1", "VisitSVELoadAndBroadcastElement"}, + }, + }, + + { "DecodeSVE100x0110", + {22, 4}, + { {"00", "VisitSVEContiguousPrefetch_ScalarPlusScalar"}, + {"1x", "VisitSVELoadAndBroadcastElement"}, + }, + }, + + { "DecodeSVE100x0111", + {22, 4}, + { {"00", "VisitSVE32BitGatherPrefetch_VectorPlusImm"}, + {"1x", "VisitSVELoadAndBroadcastElement"}, + }, + }, + + { "DecodeSVE100x11xx", + {22}, + { {"0", "VisitSVE32BitGatherLoad_VectorPlusImm"}, + {"1", "VisitSVELoadAndBroadcastElement"}, + }, + }, + + { "DecodeSVE100010xx", + {23, 4}, + { {"00", "VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets"}, + {"1x", "VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets"}, + }, + }, + + { "DecodeSVE100100x1", + {23, 22, 4}, + { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, + {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE10010000", + {23, 22, 4}, + { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, + {"100", "VisitSVELoadPredicateRegister"}, + {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE10010010", + {23, 22, 4}, + { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, + {"10x", "VisitSVELoadVectorRegister"}, + {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE100110x1", + {23, 22, 4}, + { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, + {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE10011000", + {23, 22, 4}, + { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, + {"100", "VisitSVELoadPredicateRegister"}, + {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE10011010", + {23, 22, 4}, + { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, + {"10x", "VisitSVELoadVectorRegister"}, + {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, + }, + }, + + { "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm", + {20}, + { {"0", "VisitSVELoadAndBroadcastQuadword_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE101xx101", + {20}, + { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"}, + {"1", "VisitSVEContiguousNonFaultLoad_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE101x0110", + {22}, + { {"0", "VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar"}, + {"1", "VisitSVELoadMultipleStructures_ScalarPlusScalar"}, + }, + }, + + { "DecodeSVE101x0111", + {22, 20}, + { {"00", "VisitSVEContiguousNonTemporalLoad_ScalarPlusImm"}, + {"10", "VisitSVELoadMultipleStructures_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE101x1111", + {22, 20}, + { {"x0", "VisitSVELoadMultipleStructures_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE110x0111", + {22, 4}, + { {"00", "VisitSVE64BitGatherPrefetch_VectorPlusImm"}, + {"1x", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + }, + }, + + { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", + {22}, + { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + }, + }, + + { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", + {22}, + { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + }, + }, + + { "DecodeSVE110010xx", + {23, 4}, + { {"00", "VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets"}, + {"1x", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"}, + }, + }, + + { "DecodeSVE110011xx", + {23, 22, 4}, + { {"x0x", "VisitSVE64BitGatherLoad_VectorPlusImm"}, + {"010", "VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets"}, + {"11x", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"}, + }, + }, + + { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", + {22}, + { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + }, + }, + + { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", + {22}, + { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, + }, + }, + + { "DecodeSVE110111xx", + {22}, + { {"0", "VisitSVE64BitGatherLoad_VectorPlusImm"}, + {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"}, + }, + }, + + { "DecodeSVE111x0011", + {22}, + { {"0", "VisitSVEContiguousNonTemporalStore_ScalarPlusScalar"}, + {"1", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"}, + }, + }, + + { "DecodeSVE111x01x0", + {22}, + { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets"}, + {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets"}, + }, + }, + + { "DecodeSVE111x0101", + {22}, + { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets"}, + {"1", "VisitSVE64BitScatterStore_VectorPlusImm"}, + }, + }, + + { "DecodeSVE111x0111", + {22, 20}, + { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"}, + {"01", "VisitSVEContiguousNonTemporalStore_ScalarPlusImm"}, + {"11", "VisitSVEStoreMultipleStructures_ScalarPlusImm"}, + }, + }, + + { "DecodeSVE111x11x0", + {22}, + { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets"}, + {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets"}, + }, + }, + + { "DecodeSVE111x1101", + {22}, + { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets"}, + {"1", "VisitSVE32BitScatterStore_VectorPlusImm"}, + }, + }, + + { "DecodeSVE111x1111", + {22, 20}, + { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"}, + {"x1", "VisitSVEStoreMultipleStructures_ScalarPlusImm"}, + }, + }, + + { "UnallocSVEStorePredicateRegister", + {23, 22, 4}, + { {"100", "VisitSVEStorePredicateRegister"}, + }, + }, + + { "DecodeSVE1111x010", + {23, 22}, + { {"0x", "VisitSVEContiguousStore_ScalarPlusScalar"}, + {"10", "VisitSVEStoreVectorRegister"}, + {"11", "VisitSVEContiguousStore_ScalarPlusScalar"}, + }, + }, + { "DecodeNEONScalarAnd3SHA", {29, 23, 22, 15, 14, 11, 10}, { {"0xx0x00", "VisitCrypto3RegSHA"}, @@ -1388,6 +2103,28 @@ static const DecodeMapping kDecodeMapping[] = { {"otherwise", "VisitUnconditionalBranchToRegister"}, }, }, + + { "DecodeSVE101xxxxx", + {15, 14, 13}, + { {"101", "DecodeSVE101xx101"}, + {"010", "VisitSVEContiguousLoad_ScalarPlusScalar"}, + {"otherwise", "VisitSVEMemContiguousLoad"}, + }, + }, + + { "DecodeSVE101xx101", + {20}, + { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"}, + {"1", "VisitSVEMemContiguousLoad"}, + }, + }, + + { "DecodeSVE00000001", + {20, 19}, + { {"10", "VisitSVEMovprfx"}, + {"otherwise", "VisitSVEIntReduction"}, + }, + }, }; // clang-format on diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 7d6fa148..d8ac2d24 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -24,6 +24,7 @@ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include <bitset> #include <cstdlib> #include <sstream> @@ -956,7 +957,7 @@ void Disassembler::VisitTestBranch(const Instruction *instr) { // disassembled as Wt, otherwise Xt. As the top bit of the immediate is // encoded in bit 31 of the instruction, we can reuse the Rt form, which // uses bit 31 (normally "sf") to choose the register size. - const char *form = "'Rt, 'IS, 'TImmTest"; + const char *form = "'Rt, 'It, 'TImmTest"; switch (instr->Mask(TestBranchMask)) { case TBZ: @@ -1086,7 +1087,7 @@ void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) { #undef LS_UNSIGNEDOFFSET case PRFM_unsigned: mnemonic = "prfm"; - form = "'PrefOp, ['Xns'ILU]"; + form = "'prefOp, ['Xns'ILU]"; } Format(instr, mnemonic, form); } @@ -1165,7 +1166,7 @@ void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) { #undef LS_REGISTEROFFSET case PRFM_reg: mnemonic = "prfm"; - form = "'PrefOp, ['Xns, 'Offsetreg]"; + form = "'prefOp, ['Xns, 'Offsetreg]"; } Format(instr, mnemonic, form); } @@ -1180,7 +1181,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) { const char *form_s = "'St, ['Xns'ILS]"; const char *form_d = "'Dt, ['Xns'ILS]"; const char *form_q = "'Qt, ['Xns'ILS]"; - const char *form_prefetch = "'PrefOp, ['Xns'ILS]"; + const char *form_prefetch = "'prefOp, ['Xns'ILS]"; switch (instr->Mask(LoadStoreUnscaledOffsetMask)) { case STURB_w: @@ -1303,7 +1304,7 @@ void Disassembler::VisitLoadLiteral(const Instruction *instr) { } case PRFM_lit: { mnemonic = "prfm"; - form = "'PrefOp, 'ILLiteral 'LValue"; + form = "'prefOp, 'ILLiteral 'LValue"; break; } default: @@ -1486,14 +1487,14 @@ void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) { V(CASAH, "casah", "'Ws, 'Wt") \ V(CASLH, "caslh", "'Ws, 'Wt") \ V(CASALH, "casalh", "'Ws, 'Wt") \ - V(CASP_w, "casp", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ - V(CASP_x, "casp", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ - V(CASPA_w, "caspa", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ - V(CASPA_x, "caspa", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ - V(CASPL_w, "caspl", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ - V(CASPL_x, "caspl", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ - V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ - V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") + V(CASP_w, "casp", "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASP_x, "casp", "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPA_w, "caspa", "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPA_x, "caspa", "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPL_w, "caspl", "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPL_x, "caspl", "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPAL_w, "caspal", "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPAL_x, "caspal", "'Xs, 'Xs+, 'Xt, 'Xt+") // clang-format on @@ -1898,15 +1899,15 @@ void Disassembler::VisitFPImmediate(const Instruction *instr) { switch (instr->Mask(FPImmediateMask)) { case FMOV_h_imm: mnemonic = "fmov"; - form = "'Hd, 'IFPHalf"; + form = "'Hd, 'IFP"; break; case FMOV_s_imm: mnemonic = "fmov"; - form = "'Sd, 'IFPSingle"; + form = "'Sd, 'IFP"; break; case FMOV_d_imm: mnemonic = "fmov"; - form = "'Dd, 'IFPDouble"; + form = "'Dd, 'IFP"; break; default: VIXL_UNREACHABLE(); @@ -3409,7 +3410,7 @@ void Disassembler::VisitNEONCopy(const Instruction *instr) { } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) { mnemonic = "smov"; nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); - form = "'Rdq, 'Vn.%s['IVInsIndex1]"; + form = "'R30d, 'Vn.%s['IVInsIndex1]"; } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) { mnemonic = "dup"; form = "'Vd.%s, 'Vn.%s['IVInsIndex1]"; @@ -4006,19 +4007,16 @@ void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) { } } else { // cmode<0> == '1' mnemonic = "fmov"; + form = "'Vt.%s, 'IFPNeon"; if (half_enc == 1) { - form = "'Vt.%s, 'IVMIImmFPHalf"; nfd.SetFormatMap(0, &map_h); } else if (op == 0) { - form = "'Vt.%s, 'IVMIImmFPSingle"; nfd.SetFormatMap(0, &map_s); + } else if (q == 1) { + form = "'Vt.2d, 'IFPNeon"; } else { - if (q == 1) { - form = "'Vt.2d, 'IVMIImmFPDouble"; - } else { - mnemonic = "unallocated"; - form = "(NEONModifiedImmediate)"; - } + mnemonic = "unallocated"; + form = "(NEONModifiedImmediate)"; } } } @@ -4926,6 +4924,4582 @@ void Disassembler::VisitNEONPerm(const Instruction *instr) { Format(instr, mnemonic, nfd.Substitute(form)); } +void Disassembler:: + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]"; + + switch (instr->Mask( + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) { + case LD1H_z_p_bz_s_x32_scaled: + mnemonic = "ld1h"; + break; + case LD1SH_z_p_bz_s_x32_scaled: + mnemonic = "ld1sh"; + break; + case LDFF1H_z_p_bz_s_x32_scaled: + mnemonic = "ldff1h"; + break; + case LDFF1SH_z_p_bz_s_x32_scaled: + mnemonic = "ldff1sh"; + break; + default: + form = "(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]"; + + switch ( + instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) { + case LD1W_z_p_bz_s_x32_scaled: + mnemonic = "ld1w"; + break; + case LDFF1W_z_p_bz_s_x32_scaled: + mnemonic = "ldff1w"; + break; + default: + form = "(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets( + const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]"; + + const char *mnemonic = "unimplemented"; + switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) { + case LD1B_z_p_bz_s_x32_unscaled: + mnemonic = "ld1b"; + break; + case LD1H_z_p_bz_s_x32_unscaled: + mnemonic = "ld1h"; + break; + case LD1SB_z_p_bz_s_x32_unscaled: + mnemonic = "ld1sb"; + break; + case LD1SH_z_p_bz_s_x32_unscaled: + mnemonic = "ld1sh"; + break; + case LD1W_z_p_bz_s_x32_unscaled: + mnemonic = "ld1w"; + break; + case LDFF1B_z_p_bz_s_x32_unscaled: + mnemonic = "ldff1b"; + break; + case LDFF1H_z_p_bz_s_x32_unscaled: + mnemonic = "ldff1h"; + break; + case LDFF1SB_z_p_bz_s_x32_unscaled: + mnemonic = "ldff1sb"; + break; + case LDFF1SH_z_p_bz_s_x32_unscaled: + mnemonic = "ldff1sh"; + break; + case LDFF1W_z_p_bz_s_x32_unscaled: + mnemonic = "ldff1w"; + break; + default: + form = "(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]"; + const char *form_imm_b = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]"; + const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]"; + const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]"; + const char *form_imm; + + const char *mnemonic = "unimplemented"; + switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) { + case LD1B_z_p_ai_s: + mnemonic = "ld1b"; + form_imm = form_imm_b; + break; + case LD1H_z_p_ai_s: + mnemonic = "ld1h"; + form_imm = form_imm_h; + break; + case LD1SB_z_p_ai_s: + mnemonic = "ld1sb"; + form_imm = form_imm_b; + break; + case LD1SH_z_p_ai_s: + mnemonic = "ld1sh"; + form_imm = form_imm_h; + break; + case LD1W_z_p_ai_s: + mnemonic = "ld1w"; + form_imm = form_imm_w; + break; + case LDFF1B_z_p_ai_s: + mnemonic = "ldff1b"; + form_imm = form_imm_b; + break; + case LDFF1H_z_p_ai_s: + mnemonic = "ldff1h"; + form_imm = form_imm_h; + break; + case LDFF1SB_z_p_ai_s: + mnemonic = "ldff1sb"; + form_imm = form_imm_b; + break; + case LDFF1SH_z_p_ai_s: + mnemonic = "ldff1sh"; + form_imm = form_imm_h; + break; + case LDFF1W_z_p_ai_s: + mnemonic = "ldff1w"; + form_imm = form_imm_w; + break; + default: + form = "(SVE32BitGatherLoad_VectorPlusImm)"; + form_imm = form; + break; + } + if (instr->ExtractBits(20, 16) != 0) form = form_imm; + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.s, '?22:suxtw"; + const char *suffix = NULL; + + switch ( + instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) { + case PRFB_i_p_bz_s_x32_scaled: + mnemonic = "prfb"; + suffix = "]"; + break; + case PRFD_i_p_bz_s_x32_scaled: + mnemonic = "prfd"; + suffix = " #3]"; + break; + case PRFH_i_p_bz_s_x32_scaled: + mnemonic = "prfh"; + suffix = " #1]"; + break; + case PRFW_i_p_bz_s_x32_scaled: + mnemonic = "prfw"; + suffix = " #2]"; + break; + default: + form = "(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = (instr->ExtractBits(20, 16) != 0) + ? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]" + : "'prefSVEOp, 'Pgl, ['Zn.s]"; + + switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) { + case PRFB_i_p_ai_s: + mnemonic = "prfb"; + break; + case PRFD_i_p_ai_s: + mnemonic = "prfd"; + break; + case PRFH_i_p_ai_s: + mnemonic = "prfh"; + break; + case PRFW_i_p_ai_s: + mnemonic = "prfw"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]"; + + switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { + case ST1H_z_p_bz_s_x32_scaled: + mnemonic = "st1h"; + break; + case ST1W_z_p_bz_s_x32_scaled: + mnemonic = "st1w"; + break; + default: + form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]"; + + switch ( + instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { + case ST1B_z_p_bz_s_x32_unscaled: + mnemonic = "st1b"; + break; + case ST1H_z_p_bz_s_x32_unscaled: + mnemonic = "st1h"; + break; + case ST1W_z_p_bz_s_x32_unscaled: + mnemonic = "st1w"; + break; + default: + form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.s}, 'Pgl, ['Zn.s"; + const char *suffix = NULL; + + bool is_zero = instr->ExtractBits(20, 16) == 0; + + switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) { + case ST1B_z_p_ai_s: + mnemonic = "st1b"; + suffix = is_zero ? "]" : ", #'u2016]"; + break; + case ST1H_z_p_ai_s: + mnemonic = "st1h"; + suffix = is_zero ? "]" : ", #'u2016*2]"; + break; + case ST1W_z_p_ai_s: + mnemonic = "st1w"; + suffix = is_zero ? "]" : ", #'u2016*4]"; + break; + default: + form = "(SVE32BitScatterStore_VectorPlusImm)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw #'u2423]"; + + switch (instr->Mask( + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { + case LD1D_z_p_bz_d_x32_scaled: + mnemonic = "ld1d"; + break; + case LD1H_z_p_bz_d_x32_scaled: + mnemonic = "ld1h"; + break; + case LD1SH_z_p_bz_d_x32_scaled: + mnemonic = "ld1sh"; + break; + case LD1SW_z_p_bz_d_x32_scaled: + mnemonic = "ld1sw"; + break; + case LD1W_z_p_bz_d_x32_scaled: + mnemonic = "ld1w"; + break; + case LDFF1D_z_p_bz_d_x32_scaled: + mnemonic = "ldff1d"; + break; + case LDFF1H_z_p_bz_d_x32_scaled: + mnemonic = "ldff1h"; + break; + case LDFF1SH_z_p_bz_d_x32_scaled: + mnemonic = "ldff1sh"; + break; + case LDFF1SW_z_p_bz_d_x32_scaled: + mnemonic = "ldff1sw"; + break; + case LDFF1W_z_p_bz_d_x32_scaled: + mnemonic = "ldff1w"; + break; + default: + form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]"; + + switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) { + case LD1D_z_p_bz_d_64_scaled: + mnemonic = "ld1d"; + break; + case LD1H_z_p_bz_d_64_scaled: + mnemonic = "ld1h"; + break; + case LD1SH_z_p_bz_d_64_scaled: + mnemonic = "ld1sh"; + break; + case LD1SW_z_p_bz_d_64_scaled: + mnemonic = "ld1sw"; + break; + case LD1W_z_p_bz_d_64_scaled: + mnemonic = "ld1w"; + break; + case LDFF1D_z_p_bz_d_64_scaled: + mnemonic = "ldff1d"; + break; + case LDFF1H_z_p_bz_d_64_scaled: + mnemonic = "ldff1h"; + break; + case LDFF1SH_z_p_bz_d_64_scaled: + mnemonic = "ldff1sh"; + break; + case LDFF1SW_z_p_bz_d_64_scaled: + mnemonic = "ldff1sw"; + break; + case LDFF1W_z_p_bz_d_64_scaled: + mnemonic = "ldff1w"; + break; + default: + form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]"; + + switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) { + case LD1B_z_p_bz_d_64_unscaled: + mnemonic = "ld1b"; + break; + case LD1D_z_p_bz_d_64_unscaled: + mnemonic = "ld1d"; + break; + case LD1H_z_p_bz_d_64_unscaled: + mnemonic = "ld1h"; + break; + case LD1SB_z_p_bz_d_64_unscaled: + mnemonic = "ld1sb"; + break; + case LD1SH_z_p_bz_d_64_unscaled: + mnemonic = "ld1sh"; + break; + case LD1SW_z_p_bz_d_64_unscaled: + mnemonic = "ld1sw"; + break; + case LD1W_z_p_bz_d_64_unscaled: + mnemonic = "ld1w"; + break; + case LDFF1B_z_p_bz_d_64_unscaled: + mnemonic = "ldff1b"; + break; + case LDFF1D_z_p_bz_d_64_unscaled: + mnemonic = "ldff1d"; + break; + case LDFF1H_z_p_bz_d_64_unscaled: + mnemonic = "ldff1h"; + break; + case LDFF1SB_z_p_bz_d_64_unscaled: + mnemonic = "ldff1sb"; + break; + case LDFF1SH_z_p_bz_d_64_unscaled: + mnemonic = "ldff1sh"; + break; + case LDFF1SW_z_p_bz_d_64_unscaled: + mnemonic = "ldff1sw"; + break; + case LDFF1W_z_p_bz_d_64_unscaled: + mnemonic = "ldff1w"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler:: + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]"; + + switch (instr->Mask( + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { + case LD1B_z_p_bz_d_x32_unscaled: + mnemonic = "ld1b"; + break; + case LD1D_z_p_bz_d_x32_unscaled: + mnemonic = "ld1d"; + break; + case LD1H_z_p_bz_d_x32_unscaled: + mnemonic = "ld1h"; + break; + case LD1SB_z_p_bz_d_x32_unscaled: + mnemonic = "ld1sb"; + break; + case LD1SH_z_p_bz_d_x32_unscaled: + mnemonic = "ld1sh"; + break; + case LD1SW_z_p_bz_d_x32_unscaled: + mnemonic = "ld1sw"; + break; + case LD1W_z_p_bz_d_x32_unscaled: + mnemonic = "ld1w"; + break; + case LDFF1B_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1b"; + break; + case LDFF1D_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1d"; + break; + case LDFF1H_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1h"; + break; + case LDFF1SB_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1sb"; + break; + case LDFF1SH_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1sh"; + break; + case LDFF1SW_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1sw"; + break; + case LDFF1W_z_p_bz_d_x32_unscaled: + mnemonic = "ldff1w"; + break; + default: + form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d]"; + const char *form_imm[4] = {"{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016]", + "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*2]", + "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*4]", + "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*8]"}; + + if (instr->ExtractBits(20, 16) != 0) { + unsigned msz = instr->ExtractBits(24, 23); + bool sign_extend = instr->ExtractBit(14) == 0; + if ((msz == kDRegSizeInBytesLog2) && sign_extend) { + form = "(SVE64BitGatherLoad_VectorPlusImm)"; + } else { + VIXL_ASSERT(msz < ArrayLength(form_imm)); + form = form_imm[msz]; + } + } + + const char *mnemonic = "unimplemented"; + switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) { + case LD1B_z_p_ai_d: + mnemonic = "ld1b"; + break; + case LD1D_z_p_ai_d: + mnemonic = "ld1d"; + break; + case LD1H_z_p_ai_d: + mnemonic = "ld1h"; + break; + case LD1SB_z_p_ai_d: + mnemonic = "ld1sb"; + break; + case LD1SH_z_p_ai_d: + mnemonic = "ld1sh"; + break; + case LD1SW_z_p_ai_d: + mnemonic = "ld1sw"; + break; + case LD1W_z_p_ai_d: + mnemonic = "ld1w"; + break; + case LDFF1B_z_p_ai_d: + mnemonic = "ldff1b"; + break; + case LDFF1D_z_p_ai_d: + mnemonic = "ldff1d"; + break; + case LDFF1H_z_p_ai_d: + mnemonic = "ldff1h"; + break; + case LDFF1SB_z_p_ai_d: + mnemonic = "ldff1sb"; + break; + case LDFF1SH_z_p_ai_d: + mnemonic = "ldff1sh"; + break; + case LDFF1SW_z_p_ai_d: + mnemonic = "ldff1sw"; + break; + case LDFF1W_z_p_ai_d: + mnemonic = "ldff1w"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)"; + + switch ( + instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) { + case PRFB_i_p_bz_d_64_scaled: + mnemonic = "prfb"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d]"; + break; + case PRFD_i_p_bz_d_64_scaled: + mnemonic = "prfd"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #3]"; + break; + case PRFH_i_p_bz_d_64_scaled: + mnemonic = "prfh"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #1]"; + break; + case PRFW_i_p_bz_d_64_scaled: + mnemonic = "prfw"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #2]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler:: + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw"; + const char *suffix = NULL; + + switch (instr->Mask( + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) { + case PRFB_i_p_bz_d_x32_scaled: + mnemonic = "prfb"; + suffix = " ]"; + break; + case PRFD_i_p_bz_d_x32_scaled: + mnemonic = "prfd"; + suffix = " #3]"; + break; + case PRFH_i_p_bz_d_x32_scaled: + mnemonic = "prfh"; + suffix = " #1]"; + break; + case PRFW_i_p_bz_d_x32_scaled: + mnemonic = "prfw"; + suffix = " #2]"; + break; + default: + form = "(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = (instr->ExtractBits(20, 16) != 0) + ? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]" + : "'prefSVEOp, 'Pgl, ['Zn.d]"; + + switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) { + case PRFB_i_p_ai_d: + mnemonic = "prfb"; + break; + case PRFD_i_p_ai_d: + mnemonic = "prfd"; + break; + case PRFH_i_p_ai_d: + mnemonic = "prfh"; + break; + case PRFW_i_p_ai_d: + mnemonic = "prfw"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]"; + + switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) { + case ST1D_z_p_bz_d_64_scaled: + mnemonic = "st1d"; + break; + case ST1H_z_p_bz_d_64_scaled: + mnemonic = "st1h"; + break; + case ST1W_z_p_bz_d_64_scaled: + mnemonic = "st1w"; + break; + default: + form = "(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]"; + + switch ( + instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) { + case ST1B_z_p_bz_d_64_unscaled: + mnemonic = "st1b"; + break; + case ST1D_z_p_bz_d_64_unscaled: + mnemonic = "st1d"; + break; + case ST1H_z_p_bz_d_64_unscaled: + mnemonic = "st1h"; + break; + case ST1W_z_p_bz_d_64_unscaled: + mnemonic = "st1w"; + break; + default: + form = "(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffset)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler:: + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]"; + + switch (instr->Mask( + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { + case ST1D_z_p_bz_d_x32_scaled: + mnemonic = "st1d"; + break; + case ST1H_z_p_bz_d_x32_scaled: + mnemonic = "st1h"; + break; + case ST1W_z_p_bz_d_x32_scaled: + mnemonic = "st1w"; + break; + default: + form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler:: + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]"; + + switch (instr->Mask( + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { + case ST1B_z_p_bz_d_x32_unscaled: + mnemonic = "st1b"; + break; + case ST1D_z_p_bz_d_x32_unscaled: + mnemonic = "st1d"; + break; + case ST1H_z_p_bz_d_x32_unscaled: + mnemonic = "st1h"; + break; + case ST1W_z_p_bz_d_x32_unscaled: + mnemonic = "st1w"; + break; + default: + form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.d}, 'Pgl, ['Zn.d"; + const char *suffix = NULL; + + bool is_zero = instr->ExtractBits(20, 16) == 0; + + switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) { + case ST1B_z_p_ai_d: + mnemonic = "st1b"; + suffix = is_zero ? "]" : ", #'u2016]"; + break; + case ST1D_z_p_ai_d: + mnemonic = "st1d"; + suffix = is_zero ? "]" : ", #'u2016*8]"; + break; + case ST1H_z_p_ai_d: + mnemonic = "st1h"; + suffix = is_zero ? "]" : ", #'u2016*2]"; + break; + case ST1W_z_p_ai_d: + mnemonic = "st1w"; + suffix = is_zero ? "]" : ", #'u2016*4]"; + break; + default: + form = "(SVE64BitScatterStore_VectorPlusImm)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'tl, 'Zd.'tl, 'ITriSvel"; + + if (instr->GetSVEImmLogical() == 0) { + // The immediate encoded in the instruction is not in the expected format. + Format(instr, "unallocated", "(SVEBitwiseImm)"); + return; + } + + switch (instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) { + case AND_z_zi: + mnemonic = "and"; + break; + case EOR_z_zi: + mnemonic = "eor"; + break; + case ORR_z_zi: + mnemonic = "orr"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) { + case AND_z_p_zz: + mnemonic = "and"; + break; + case BIC_z_p_zz: + mnemonic = "bic"; + break; + case EOR_z_p_zz: + mnemonic = "eor"; + break; + case ORR_z_p_zz: + mnemonic = "orr"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBitwiseShiftByImm_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, 'ITriSveq"; + unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8); + + if (tsize == 0) { + form = "(SVEBitwiseShiftByImm_Predicated)"; + } else { + switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) { + case ASRD_z_p_zi: + mnemonic = "asrd"; + break; + case ASR_z_p_zi: + mnemonic = "asr"; + break; + case LSL_z_p_zi: + mnemonic = "lsl"; + form = "'Zd.'tszp, p'u1210/m, 'Zd.'tszp, 'ITriSvep"; + break; + case LSR_z_p_zi: + mnemonic = "lsr"; + break; + default: + break; + } + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBitwiseShiftByVector_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) { + case ASRR_z_p_zz: + mnemonic = "asrr"; + break; + case ASR_z_p_zz: + mnemonic = "asr"; + break; + case LSLR_z_p_zz: + mnemonic = "lslr"; + break; + case LSL_z_p_zz: + mnemonic = "lsl"; + break; + case LSRR_z_p_zz: + mnemonic = "lsrr"; + break; + case LSR_z_p_zz: + mnemonic = "lsr"; + break; + default: + form = "(SVEBitwiseShiftByVector_Predicated)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d"; + + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "(SVEBitwiseShiftByWideElements_Predicated)"; + } else { + switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { + case ASR_z_p_zw: + mnemonic = "asr"; + break; + case LSL_z_p_zw: + mnemonic = "lsl"; + break; + case LSR_z_p_zw: + mnemonic = "lsr"; + break; + default: + form = "(SVEBitwiseShiftByWideElements_Predicated)"; + break; + } + } + Format(instr, mnemonic, form); +} + +static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) { + VIXL_ASSERT(IsUintN(8 << lane_bytes_log2, value)); + + // Duplicate lane-sized value across double word. + switch (lane_bytes_log2) { + case 0: + value *= 0x0101010101010101; + break; + case 1: + value *= 0x0001000100010001; + break; + case 2: + value *= 0x0000000100000001; + break; + case 3: // Nothing to do + break; + default: + VIXL_UNREACHABLE(); + } + + if ((value & 0xff) == 0) { + // Check for 16-bit patterns. Set least-significant 16 bits, to make tests + // easier; we already checked least-significant byte is zero above. + uint64_t generic_value = value | 0xffff; + + // Check 0x00000000_0000pq00 or 0xffffffff_ffffpq00. + if ((generic_value == 0xffff) || (generic_value == UINT64_MAX)) { + return false; + } + + // Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00. + uint64_t rotvalue = RotateRight(value, 32, 64); + if (value == rotvalue) { + generic_value &= 0xffffffff; + if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) { + return false; + } + } + + // Check 0xpq00pq00_pq00pq00. + rotvalue = RotateRight(value, 16, 64); + if (value == rotvalue) { + return false; + } + } else { + // Check for 8-bit patterns. Set least-significant byte, to make tests + // easier. + uint64_t generic_value = value | 0xff; + + // Check 0x00000000_000000pq or 0xffffffff_ffffffpq. + if ((generic_value == 0xff) || (generic_value == UINT64_MAX)) { + return false; + } + + // Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq. + uint64_t rotvalue = RotateRight(value, 32, 64); + if (value == rotvalue) { + generic_value &= 0xffffffff; + if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) { + return false; + } + } + + // Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq. + rotvalue = RotateRight(value, 16, 64); + if (value == rotvalue) { + generic_value &= 0xffff; + if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) { + return false; + } + } + + // Check 0xpqpqpqpq_pqpqpqpq. + rotvalue = RotateRight(value, 8, 64); + if (value == rotvalue) { + return false; + } + } + return true; +} + +void Disassembler::VisitSVEBroadcastBitmaskImm(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastBitmaskImm)"; + + switch (instr->Mask(SVEBroadcastBitmaskImmMask)) { + case DUPM_z_i: { + uint64_t imm = instr->GetSVEImmLogical(); + if (imm != 0) { + int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); + mnemonic = SVEMoveMaskPreferred(imm, lane_size) ? "mov" : "dupm"; + form = "'Zd.'tl, 'ITriSvel"; + } + break; + } + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastFPImm_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastFPImm_Unpredicated)"; + + switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { + case FDUP_z_i: + // The preferred disassembly for fdup is "fmov". + mnemonic = "fmov"; + form = "'Zd.'t, 'IFPSve"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastGeneralRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastGeneralRegister)"; + + switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) { + case DUP_z_r: + // The preferred disassembly for dup is "mov". + mnemonic = "mov"; + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Zd.'t, 'Xns"; + } else { + form = "'Zd.'t, 'Wns"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastIndexElement)"; + + switch (instr->Mask(SVEBroadcastIndexElementMask)) { + case DUP_z_zi: { + // The tsz field must not be zero. + int tsz = instr->ExtractBits(20, 16); + if (tsz != 0) { + // The preferred disassembly for dup is "mov". + mnemonic = "mov"; + int imm2 = instr->ExtractBits(23, 22); + if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) { + // If imm2:tsz has one set bit, the index is zero. This is + // disassembled as a mov from a b/h/s/d/q scalar register. + form = "'Zd.'tszx, 'tszx'u0905"; + } else { + form = "'Zd.'tszx, 'Zn.'tszx['IVInsSVEIndex]"; + } + } + break; + } + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastIntImm_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastIntImm_Unpredicated)"; + + switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) { + case DUP_z_i: + // The encoding of byte-sized lanes with lsl #8 is undefined. + if ((instr->GetSVEVectorFormat() == kFormatVnB) && + (instr->ExtractBit(13) == 1)) + break; + + // The preferred disassembly for dup is "mov". + mnemonic = "mov"; + form = (instr->ExtractBit(13) == 0) ? "'Zd.'t, #'s1205" + : "'Zd.'t, #'s1205, lsl #8"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECompressActiveElements)"; + + switch (instr->Mask(SVECompressActiveElementsMask)) { + case COMPACT_z_p_z: + // The top bit of size is always set for compact, so 't can only be + // substituted with types S and D. + VIXL_ASSERT(instr->ExtractBit(23) == 1); + mnemonic = "compact"; + form = "'Zd.'t, 'Pgl, 'Zn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEConditionallyBroadcastElementToVector( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) { + case CLASTA_z_p_zz: + mnemonic = "clasta"; + break; + case CLASTB_z_p_zz: + mnemonic = "clastb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t"; + + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Xd, p'u1210, 'Xd, 'Zn.'t"; + } + + switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) { + case CLASTA_r_p_z: + mnemonic = "clasta"; + break; + case CLASTB_r_p_z: + mnemonic = "clastb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"; + + switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) { + case CLASTA_v_p_z: + mnemonic = "clasta"; + break; + case CLASTB_v_p_z: + mnemonic = "clastb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEConditionallyTerminateScalars( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm"; + + switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) { + case CTERMEQ_rr: + mnemonic = "ctermeq"; + break; + case CTERMNE_rr: + mnemonic = "ctermne"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEConstructivePrefix_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEConstructivePrefix_Unpredicated)"; + + switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) { + case MOVPRFX_z_z: + mnemonic = "movprfx"; + form = "'Zd, 'Zn"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + + bool rm_is_zr = instr->GetRm() == kZeroRegCode; + + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; + const char *suffix = NULL; + + switch (instr->Mask(SVEContiguousFirstFaultLoad_ScalarPlusScalarMask)) { + case LDFF1B_z_p_br_u16: + case LDFF1B_z_p_br_u32: + case LDFF1B_z_p_br_u64: + case LDFF1B_z_p_br_u8: + mnemonic = "ldff1b"; + suffix = rm_is_zr ? "]" : ", 'Xm]"; + break; + case LDFF1D_z_p_br_u64: + mnemonic = "ldff1d"; + suffix = rm_is_zr ? "]" : ", 'Xm, lsl #3]"; + break; + case LDFF1H_z_p_br_u16: + case LDFF1H_z_p_br_u32: + case LDFF1H_z_p_br_u64: + mnemonic = "ldff1h"; + suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]"; + break; + case LDFF1SB_z_p_br_s16: + case LDFF1SB_z_p_br_s32: + case LDFF1SB_z_p_br_s64: + mnemonic = "ldff1sb"; + suffix = rm_is_zr ? "]" : ", 'Xm]"; + break; + case LDFF1SH_z_p_br_s32: + case LDFF1SH_z_p_br_s64: + mnemonic = "ldff1sh"; + suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]"; + break; + case LDFF1SW_z_p_br_s64: + mnemonic = "ldff1sw"; + suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]"; + break; + case LDFF1W_z_p_br_u32: + case LDFF1W_z_p_br_u64: + mnemonic = "ldff1w"; + suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]"; + break; + default: + form = "(SVEContiguousFirstFaultLoad_ScalarPlusScalar)"; + break; + } + + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + + switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) { + case LDNF1B_z_p_bi_u16: + case LDNF1B_z_p_bi_u32: + case LDNF1B_z_p_bi_u64: + case LDNF1B_z_p_bi_u8: + mnemonic = "ldnf1b"; + break; + case LDNF1D_z_p_bi_u64: + mnemonic = "ldnf1d"; + break; + case LDNF1H_z_p_bi_u16: + case LDNF1H_z_p_bi_u32: + case LDNF1H_z_p_bi_u64: + mnemonic = "ldnf1h"; + break; + case LDNF1SB_z_p_bi_s16: + case LDNF1SB_z_p_bi_s32: + case LDNF1SB_z_p_bi_s64: + mnemonic = "ldnf1sb"; + break; + case LDNF1SH_z_p_bi_s32: + case LDNF1SH_z_p_bi_s64: + mnemonic = "ldnf1sh"; + break; + case LDNF1SW_z_p_bi_s64: + mnemonic = "ldnf1sw"; + break; + case LDNF1W_z_p_bi_u32: + case LDNF1W_z_p_bi_u64: + mnemonic = "ldnf1w"; + break; + default: + form = "(SVEContiguousNonFaultLoad_ScalarPlusImm)"; + suffix = NULL; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusImm)"; + + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) { + case LDNT1B_z_p_bi_contiguous: + mnemonic = "ldnt1b"; + form = "{'Zt.b}, 'Pgl/z, ['Xns"; + break; + case LDNT1D_z_p_bi_contiguous: + mnemonic = "ldnt1d"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + break; + case LDNT1H_z_p_bi_contiguous: + mnemonic = "ldnt1h"; + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + break; + case LDNT1W_z_p_bi_contiguous: + mnemonic = "ldnt1w"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + break; + default: + suffix = NULL; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusScalar)"; + + switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) { + case LDNT1B_z_p_br_contiguous: + mnemonic = "ldnt1b"; + form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; + break; + case LDNT1D_z_p_br_contiguous: + mnemonic = "ldnt1d"; + form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; + break; + case LDNT1H_z_p_br_contiguous: + mnemonic = "ldnt1h"; + form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; + break; + case LDNT1W_z_p_br_contiguous: + mnemonic = "ldnt1w"; + form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusImm)"; + + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) { + case STNT1B_z_p_bi_contiguous: + mnemonic = "stnt1b"; + form = "{'Zt.b}, 'Pgl, ['Xns"; + break; + case STNT1D_z_p_bi_contiguous: + mnemonic = "stnt1d"; + form = "{'Zt.d}, 'Pgl, ['Xns"; + break; + case STNT1H_z_p_bi_contiguous: + mnemonic = "stnt1h"; + form = "{'Zt.h}, 'Pgl, ['Xns"; + break; + case STNT1W_z_p_bi_contiguous: + mnemonic = "stnt1w"; + form = "{'Zt.s}, 'Pgl, ['Xns"; + break; + default: + suffix = NULL; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusScalar)"; + + switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) { + case STNT1B_z_p_br_contiguous: + mnemonic = "stnt1b"; + form = "{'Zt.b}, 'Pgl, ['Xns, 'Rm]"; + break; + case STNT1D_z_p_br_contiguous: + mnemonic = "stnt1d"; + form = "{'Zt.d}, 'Pgl, ['Xns, 'Rm, lsl #3]"; + break; + case STNT1H_z_p_br_contiguous: + mnemonic = "stnt1h"; + form = "{'Zt.h}, 'Pgl, ['Xns, 'Rm, lsl #1]"; + break; + case STNT1W_z_p_br_contiguous: + mnemonic = "stnt1w"; + form = "{'Zt.s}, 'Pgl, ['Xns, 'Rm, lsl #2]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = (instr->ExtractBits(21, 16) != 0) + ? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]" + : "'prefSVEOp, 'Pgl, ['Xns]"; + + switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) { + case PRFB_i_p_bi_s: + mnemonic = "prfb"; + break; + case PRFD_i_p_bi_s: + mnemonic = "prfd"; + break; + case PRFH_i_p_bi_s: + mnemonic = "prfh"; + break; + case PRFW_i_p_bi_s: + mnemonic = "prfw"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousPrefetch_ScalarPlusScalar)"; + + if (instr->GetRm() != kZeroRegCode) { + switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) { + case PRFB_i_p_br_s: + mnemonic = "prfb"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm]"; + break; + case PRFD_i_p_br_s: + mnemonic = "prfd"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #3]"; + break; + case PRFH_i_p_br_s: + mnemonic = "prfh"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #1]"; + break; + case PRFW_i_p_br_s: + mnemonic = "prfw"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #2]"; + break; + default: + break; + } + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousStore_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + + // The 'size' field isn't in the usual place here. + const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]"; + if (instr->ExtractBits(19, 16) == 0) { + form = "{'Zt.'tls}, 'Pgl, ['Xns]"; + } + + switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) { + case ST1B_z_p_bi: + mnemonic = "st1b"; + break; + case ST1D_z_p_bi: + mnemonic = "st1d"; + break; + case ST1H_z_p_bi: + mnemonic = "st1h"; + break; + case ST1W_z_p_bi: + mnemonic = "st1w"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + + // The 'size' field isn't in the usual place here. + const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]"; + + switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) { + case ST1B_z_p_br: + mnemonic = "st1b"; + break; + case ST1D_z_p_br: + mnemonic = "st1d"; + break; + case ST1H_z_p_br: + mnemonic = "st1h"; + break; + case ST1W_z_p_br: + mnemonic = "st1w"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopyFPImm_Predicated)"; + + switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { + case FCPY_z_p_i: + // The preferred disassembly for fcpy is "fmov". + mnemonic = "fmov"; + form = "'Zd.'t, 'Pm/m, 'IFPSve"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECopyGeneralRegisterToVector_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopyGeneralRegisterToVector_Predicated)"; + + switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { + case CPY_z_p_r: + // The preferred disassembly for cpy is "mov". + mnemonic = "mov"; + form = "'Zd.'t, 'Pgl/m, 'Wns"; + if (instr->GetSVESize() == kXRegSizeInBytesLog2) { + form = "'Zd.'t, 'Pgl/m, 'Xns"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECopyIntImm_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopyIntImm_Predicated)"; + const char *suffix = NULL; + + switch (instr->Mask(SVECopyIntImm_PredicatedMask)) { + case CPY_z_p_i: { + // The preferred disassembly for cpy is "mov". + mnemonic = "mov"; + form = "'Zd.'t, 'Pm/'?14:mz, #'s1205"; + if (instr->ExtractBit(13) != 0) suffix = ", lsl #8"; + break; + } + default: + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopySIMDFPScalarRegisterToVector_Predicated)"; + + switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { + case CPY_z_p_v: + // The preferred disassembly for cpy is "mov". + mnemonic = "mov"; + form = "'Zd.'t, 'Pgl/m, 'Vnv"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEExtractElementToGeneralRegister( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Wd, 'Pgl, 'Zn.'t"; + + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Xd, p'u1210, 'Zn.'t"; + } + + switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) { + case LASTA_r_p_z: + mnemonic = "lasta"; + break; + case LASTB_r_p_z: + mnemonic = "lastb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'t'u0400, 'Pgl, 'Zn.'t"; + + switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) { + case LASTA_v_p_z: + mnemonic = "lasta"; + break; + case LASTB_v_p_z: + mnemonic = "lastb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFFRInitialise)"; + + switch (instr->Mask(SVEFFRInitialiseMask)) { + case SETFFR_f: + mnemonic = "setffr"; + form = " "; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFFRWriteFromPredicate)"; + + switch (instr->Mask(SVEFFRWriteFromPredicateMask)) { + case WRFFR_f_p: + mnemonic = "wrffr"; + form = "'Pn.b"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPArithmeticWithImm_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form00 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.0"; + const char *form05 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.5"; + const char *form10 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #1.0"; + const char *form20 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #2.0"; + int i1 = instr->ExtractBit(5); + const char *form = i1 ? form10 : form00; + + switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) { + case FADD_z_p_zs: + mnemonic = "fadd"; + form = i1 ? form10 : form05; + break; + case FMAXNM_z_p_zs: + mnemonic = "fmaxnm"; + break; + case FMAX_z_p_zs: + mnemonic = "fmax"; + break; + case FMINNM_z_p_zs: + mnemonic = "fminnm"; + break; + case FMIN_z_p_zs: + mnemonic = "fmin"; + break; + case FMUL_z_p_zs: + mnemonic = "fmul"; + form = i1 ? form20 : form05; + break; + case FSUBR_z_p_zs: + mnemonic = "fsubr"; + form = i1 ? form10 : form05; + break; + case FSUB_z_p_zs: + mnemonic = "fsub"; + form = i1 ? form10 : form05; + break; + default: + form = "(SVEFPArithmeticWithImm_Predicated)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) { + case FABD_z_p_zz: + mnemonic = "fabd"; + break; + case FADD_z_p_zz: + mnemonic = "fadd"; + break; + case FDIVR_z_p_zz: + mnemonic = "fdivr"; + break; + case FDIV_z_p_zz: + mnemonic = "fdiv"; + break; + case FMAXNM_z_p_zz: + mnemonic = "fmaxnm"; + break; + case FMAX_z_p_zz: + mnemonic = "fmax"; + break; + case FMINNM_z_p_zz: + mnemonic = "fminnm"; + break; + case FMIN_z_p_zz: + mnemonic = "fmin"; + break; + case FMULX_z_p_zz: + mnemonic = "fmulx"; + break; + case FMUL_z_p_zz: + mnemonic = "fmul"; + break; + case FSCALE_z_p_zz: + mnemonic = "fscale"; + break; + case FSUBR_z_p_zz: + mnemonic = "fsubr"; + break; + case FSUB_z_p_zz: + mnemonic = "fsub"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPConvertPrecision)"; + + switch (instr->Mask(SVEFPConvertPrecisionMask)) { + case FCVT_z_p_z_d2h: + mnemonic = "fcvt"; + form = "'Zd.h, 'Pgl/m, 'Zn.d"; + break; + case FCVT_z_p_z_d2s: + mnemonic = "fcvt"; + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + case FCVT_z_p_z_h2d: + mnemonic = "fcvt"; + form = "'Zd.d, 'Pgl/m, 'Zn.h"; + break; + case FCVT_z_p_z_h2s: + mnemonic = "fcvt"; + form = "'Zd.s, 'Pgl/m, 'Zn.h"; + break; + case FCVT_z_p_z_s2d: + mnemonic = "fcvt"; + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + case FCVT_z_p_z_s2h: + mnemonic = "fcvt"; + form = "'Zd.h, 'Pgl/m, 'Zn.s"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPConvertToInt)"; + + switch (instr->Mask(SVEFPConvertToIntMask)) { + case FCVTZS_z_p_z_d2w: + mnemonic = "fcvtzs"; + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + case FCVTZS_z_p_z_d2x: + mnemonic = "fcvtzs"; + form = "'Zd.d, 'Pgl/m, 'Zn.d"; + break; + case FCVTZS_z_p_z_fp162h: + mnemonic = "fcvtzs"; + form = "'Zd.h, 'Pgl/m, 'Zn.h"; + break; + case FCVTZS_z_p_z_fp162w: + mnemonic = "fcvtzs"; + form = "'Zd.s, 'Pgl/m, 'Zn.h"; + break; + case FCVTZS_z_p_z_fp162x: + mnemonic = "fcvtzs"; + form = "'Zd.d, 'Pgl/m, 'Zn.h"; + break; + case FCVTZS_z_p_z_s2w: + mnemonic = "fcvtzs"; + form = "'Zd.s, 'Pgl/m, 'Zn.s"; + break; + case FCVTZS_z_p_z_s2x: + mnemonic = "fcvtzs"; + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + case FCVTZU_z_p_z_d2w: + mnemonic = "fcvtzu"; + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + case FCVTZU_z_p_z_d2x: + mnemonic = "fcvtzu"; + form = "'Zd.d, 'Pgl/m, 'Zn.d"; + break; + case FCVTZU_z_p_z_fp162h: + mnemonic = "fcvtzu"; + form = "'Zd.h, 'Pgl/m, 'Zn.h"; + break; + case FCVTZU_z_p_z_fp162w: + mnemonic = "fcvtzu"; + form = "'Zd.s, 'Pgl/m, 'Zn.h"; + break; + case FCVTZU_z_p_z_fp162x: + mnemonic = "fcvtzu"; + form = "'Zd.d, 'Pgl/m, 'Zn.h"; + break; + case FCVTZU_z_p_z_s2w: + mnemonic = "fcvtzu"; + form = "'Zd.s, 'Pgl/m, 'Zn.s"; + break; + case FCVTZU_z_p_z_s2x: + mnemonic = "fcvtzu"; + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPExponentialAccelerator)"; + + unsigned size = instr->GetSVESize(); + switch (instr->Mask(SVEFPExponentialAcceleratorMask)) { + case FEXPA_z_z: + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + mnemonic = "fexpa"; + form = "'Zd.'t, 'Zn.'t"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + + switch (instr->Mask(SVEFPRoundToIntegralValueMask)) { + case FRINTA_z_p_z: + mnemonic = "frinta"; + break; + case FRINTI_z_p_z: + mnemonic = "frinti"; + break; + case FRINTM_z_p_z: + mnemonic = "frintm"; + break; + case FRINTN_z_p_z: + mnemonic = "frintn"; + break; + case FRINTP_z_p_z: + mnemonic = "frintp"; + break; + case FRINTX_z_p_z: + mnemonic = "frintx"; + break; + case FRINTZ_z_p_z: + mnemonic = "frintz"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPTrigMulAddCoefficient)"; + + unsigned size = instr->GetSVESize(); + switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) { + case FTMAD_z_zzi: + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + mnemonic = "ftmad"; + form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPTrigSelectCoefficient)"; + + unsigned size = instr->GetSVESize(); + switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) { + case FTSSEL_z_zz: + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + mnemonic = "ftssel"; + form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + + if (instr->GetSVESize() == kBRegSizeInBytesLog2) { + form = "(SVEFPUnaryOp)"; + } else { + switch (instr->Mask(SVEFPUnaryOpMask)) { + case FRECPX_z_p_z: + mnemonic = "frecpx"; + break; + case FSQRT_z_p_z: + mnemonic = "fsqrt"; + break; + default: + form = "(SVEFPUnaryOp)"; + break; + } + } + Format(instr, mnemonic, form); +} + +static const char *IncDecFormHelper(const Instruction *instr, + const char *reg_pat_mul_form, + const char *reg_pat_form, + const char *reg_form) { + if (instr->ExtractBits(19, 16) == 0) { + if (instr->ExtractBits(9, 5) == SVE_ALL) { + // Use the register only form if the multiplier is one (encoded as zero) + // and the pattern is SVE_ALL. + return reg_form; + } + // Use the register and pattern form if the multiplier is one. + return reg_pat_form; + } + return reg_pat_mul_form; +} + +void Disassembler::VisitSVEIncDecRegisterByElementCount( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = + IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); + + switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) { + case DECB_r_rs: + mnemonic = "decb"; + break; + case DECD_r_rs: + mnemonic = "decd"; + break; + case DECH_r_rs: + mnemonic = "dech"; + break; + case DECW_r_rs: + mnemonic = "decw"; + break; + case INCB_r_rs: + mnemonic = "incb"; + break; + case INCD_r_rs: + mnemonic = "incd"; + break; + case INCH_r_rs: + mnemonic = "inch"; + break; + case INCW_r_rs: + mnemonic = "incw"; + break; + default: + form = "(SVEIncDecRegisterByElementCount)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIncDecVectorByElementCount( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = IncDecFormHelper(instr, + "'Zd.'t, 'Ipc, mul #'u1916+1", + "'Zd.'t, 'Ipc", + "'Zd.'t"); + + switch (instr->Mask(SVEIncDecVectorByElementCountMask)) { + case DECD_z_zs: + mnemonic = "decd"; + break; + case DECH_z_zs: + mnemonic = "dech"; + break; + case DECW_z_zs: + mnemonic = "decw"; + break; + case INCD_z_zs: + mnemonic = "incd"; + break; + case INCH_z_zs: + mnemonic = "inch"; + break; + case INCW_z_zs: + mnemonic = "incw"; + break; + default: + form = "(SVEIncDecVectorByElementCount)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEInsertGeneralRegister)"; + + switch (instr->Mask(SVEInsertGeneralRegisterMask)) { + case INSR_z_r: + mnemonic = "insr"; + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Zd.'t, 'Xn"; + } else { + form = "'Zd.'t, 'Wn"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEInsertSIMDFPScalarRegister( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEInsertSIMDFPScalarRegister)"; + + switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) { + case INSR_z_v: + mnemonic = "insr"; + form = "'Zd.'t, 'Vnv"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = (instr->ExtractBit(13) == 0) + ? "'Zd.'t, 'Zd.'t, #'u1205" + : "'Zd.'t, 'Zd.'t, #'u1205, lsl #8"; + + switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { + case ADD_z_zi: + mnemonic = "add"; + break; + case SQADD_z_zi: + mnemonic = "sqadd"; + break; + case SQSUB_z_zi: + mnemonic = "sqsub"; + break; + case SUBR_z_zi: + mnemonic = "subr"; + break; + case SUB_z_zi: + mnemonic = "sub"; + break; + case UQADD_z_zi: + mnemonic = "uqadd"; + break; + case UQSUB_z_zi: + mnemonic = "uqsub"; + break; + default: + form = "(SVEIntAddSubtractImm_Unpredicated)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntAddSubtractVectors_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) { + case ADD_z_p_zz: + mnemonic = "add"; + break; + case SUBR_z_p_zz: + mnemonic = "subr"; + break; + case SUB_z_p_zz: + mnemonic = "sub"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntCompareScalarCountAndLimit( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = + (instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm"; + + switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) { + case WHILELE_p_p_rr: + mnemonic = "whilele"; + break; + case WHILELO_p_p_rr: + mnemonic = "whilelo"; + break; + case WHILELS_p_p_rr: + mnemonic = "whilels"; + break; + case WHILELT_p_p_rr: + mnemonic = "whilelt"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEIntConvertToFP)"; + + switch (instr->Mask(SVEIntConvertToFPMask)) { + case SCVTF_z_p_z_h2fp16: + mnemonic = "scvtf"; + form = "'Zd.h, 'Pgl/m, 'Zn.h"; + break; + case SCVTF_z_p_z_w2d: + mnemonic = "scvtf"; + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + case SCVTF_z_p_z_w2fp16: + mnemonic = "scvtf"; + form = "'Zd.h, 'Pgl/m, 'Zn.s"; + break; + case SCVTF_z_p_z_w2s: + mnemonic = "scvtf"; + form = "'Zd.s, 'Pgl/m, 'Zn.s"; + break; + case SCVTF_z_p_z_x2d: + mnemonic = "scvtf"; + form = "'Zd.d, 'Pgl/m, 'Zn.d"; + break; + case SCVTF_z_p_z_x2fp16: + mnemonic = "scvtf"; + form = "'Zd.h, 'Pgl/m, 'Zn.d"; + break; + case SCVTF_z_p_z_x2s: + mnemonic = "scvtf"; + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + case UCVTF_z_p_z_h2fp16: + mnemonic = "ucvtf"; + form = "'Zd.h, 'Pgl/m, 'Zn.h"; + break; + case UCVTF_z_p_z_w2d: + mnemonic = "ucvtf"; + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + case UCVTF_z_p_z_w2fp16: + mnemonic = "ucvtf"; + form = "'Zd.h, 'Pgl/m, 'Zn.s"; + break; + case UCVTF_z_p_z_w2s: + mnemonic = "ucvtf"; + form = "'Zd.s, 'Pgl/m, 'Zn.s"; + break; + case UCVTF_z_p_z_x2d: + mnemonic = "ucvtf"; + form = "'Zd.d, 'Pgl/m, 'Zn.d"; + break; + case UCVTF_z_p_z_x2fp16: + mnemonic = "ucvtf"; + form = "'Zd.h, 'Pgl/m, 'Zn.d"; + break; + case UCVTF_z_p_z_x2s: + mnemonic = "ucvtf"; + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntDivideVectors_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { + case SDIVR_z_p_zz: + mnemonic = "sdivr"; + break; + case SDIV_z_p_zz: + mnemonic = "sdiv"; + break; + case UDIVR_z_p_zz: + mnemonic = "udivr"; + break; + case UDIV_z_p_zz: + mnemonic = "udiv"; + break; + default: + break; + } + + switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { + case SDIVR_z_p_zz: + case SDIV_z_p_zz: + case UDIVR_z_p_zz: + case UDIV_z_p_zz: + switch (instr->GetSVESize()) { + case kBRegSizeInBytesLog2: + case kHRegSizeInBytesLog2: + mnemonic = "unimplemented"; + form = "(SVEIntBinaryArithmeticPredicated)"; + break; + case kSRegSizeInBytesLog2: + case kDRegSizeInBytesLog2: + // The default form works for these instructions. + break; + default: + // GetSVESize() should never return other values. + VIXL_UNREACHABLE(); + break; + } + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntMinMaxDifference_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) { + case SABD_z_p_zz: + mnemonic = "sabd"; + break; + case SMAX_z_p_zz: + mnemonic = "smax"; + break; + case SMIN_z_p_zz: + mnemonic = "smin"; + break; + case UABD_z_p_zz: + mnemonic = "uabd"; + break; + case UMAX_z_p_zz: + mnemonic = "umax"; + break; + case UMIN_z_p_zz: + mnemonic = "umin"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Zd.'t, #'u1205"; + + switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) { + case SMAX_z_zi: + mnemonic = "smax"; + form = "'Zd.'t, 'Zd.'t, #'s1205"; + break; + case SMIN_z_zi: + mnemonic = "smin"; + form = "'Zd.'t, 'Zd.'t, #'s1205"; + break; + case UMAX_z_zi: + mnemonic = "umax"; + break; + case UMIN_z_zi: + mnemonic = "umin"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEIntMulImm_Unpredicated)"; + + switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) { + case MUL_z_zi: + mnemonic = "mul"; + form = "'Zd.'t, 'Zd.'t, #'s1205"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) { + case MUL_z_p_zz: + mnemonic = "mul"; + break; + case SMULH_z_p_zz: + mnemonic = "smulh"; + break; + case UMULH_z_p_zz: + mnemonic = "umulh"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadAndBroadcastElement)"; + const char *suffix_b = ", #'u2116]"; + const char *suffix_h = ", #'u2116*2]"; + const char *suffix_w = ", #'u2116*4]"; + const char *suffix_d = ", #'u2116*8]"; + const char *suffix = NULL; + + switch (instr->Mask(SVELoadAndBroadcastElementMask)) { + case LD1RB_z_p_bi_u16: + mnemonic = "ld1rb"; + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RB_z_p_bi_u32: + mnemonic = "ld1rb"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RB_z_p_bi_u64: + mnemonic = "ld1rb"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RB_z_p_bi_u8: + mnemonic = "ld1rb"; + form = "{'Zt.b}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RD_z_p_bi_u64: + mnemonic = "ld1rd"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_d; + break; + case LD1RH_z_p_bi_u16: + mnemonic = "ld1rh"; + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case LD1RH_z_p_bi_u32: + mnemonic = "ld1rh"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case LD1RH_z_p_bi_u64: + mnemonic = "ld1rh"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case LD1RSB_z_p_bi_s16: + mnemonic = "ld1rsb"; + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RSB_z_p_bi_s32: + mnemonic = "ld1rsb"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RSB_z_p_bi_s64: + mnemonic = "ld1rsb"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case LD1RSH_z_p_bi_s32: + mnemonic = "ld1rsh"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case LD1RSH_z_p_bi_s64: + mnemonic = "ld1rsh"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case LD1RSW_z_p_bi_s64: + mnemonic = "ld1rsw"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_w; + break; + case LD1RW_z_p_bi_u32: + mnemonic = "ld1rw"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_w; + break; + case LD1RW_z_p_bi_u64: + mnemonic = "ld1rw"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_w; + break; + default: + break; + } + + // Hide curly brackets if immediate is zero. + if (instr->ExtractBits(21, 16) == 0) { + suffix = "]"; + } + + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusImm)"; + + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916*16]"; + + switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) { + case LD1RQB_z_p_bi_u8: + mnemonic = "ld1rqb"; + form = "{'Zt.b}, 'Pgl/z, ['Xns"; + break; + case LD1RQD_z_p_bi_u64: + mnemonic = "ld1rqd"; + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + break; + case LD1RQH_z_p_bi_u16: + mnemonic = "ld1rqh"; + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + break; + case LD1RQW_z_p_bi_u32: + mnemonic = "ld1rqw"; + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + break; + default: + suffix = NULL; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusScalar)"; + + switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) { + case LD1RQB_z_p_br_contiguous: + mnemonic = "ld1rqb"; + form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; + break; + case LD1RQD_z_p_br_contiguous: + mnemonic = "ld1rqd"; + form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; + break; + case LD1RQH_z_p_br_contiguous: + mnemonic = "ld1rqh"; + form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; + break; + case LD1RQW_z_p_br_contiguous: + mnemonic = "ld1rqw"; + form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadMultipleStructures_ScalarPlusImm)"; + + const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]"; + const char *form_3 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]"; + const char *form_4 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " + "'Pgl/z, ['Xns'ISveSvl]"; + + switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) { + case LD2B_z_p_bi_contiguous: + mnemonic = "ld2b"; + form = form_2; + break; + case LD2D_z_p_bi_contiguous: + mnemonic = "ld2d"; + form = form_2; + break; + case LD2H_z_p_bi_contiguous: + mnemonic = "ld2h"; + form = form_2; + break; + case LD2W_z_p_bi_contiguous: + mnemonic = "ld2w"; + form = form_2; + break; + case LD3B_z_p_bi_contiguous: + mnemonic = "ld3b"; + form = form_3; + break; + case LD3D_z_p_bi_contiguous: + mnemonic = "ld3d"; + form = form_3; + break; + case LD3H_z_p_bi_contiguous: + mnemonic = "ld3h"; + form = form_3; + break; + case LD3W_z_p_bi_contiguous: + mnemonic = "ld3w"; + form = form_3; + break; + case LD4B_z_p_bi_contiguous: + mnemonic = "ld4b"; + form = form_4; + break; + case LD4D_z_p_bi_contiguous: + mnemonic = "ld4d"; + form = form_4; + break; + case LD4H_z_p_bi_contiguous: + mnemonic = "ld4h"; + form = form_4; + break; + case LD4W_z_p_bi_contiguous: + mnemonic = "ld4w"; + form = form_4; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadMultipleStructures_ScalarPlusScalar)"; + + const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]"; + const char *form_3 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]"; + const char *form_4 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " + "'Pgl/z, ['Xns, 'Xm'NSveS]"; + + switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) { + case LD2B_z_p_br_contiguous: + mnemonic = "ld2b"; + form = form_2; + break; + case LD2D_z_p_br_contiguous: + mnemonic = "ld2d"; + form = form_2; + break; + case LD2H_z_p_br_contiguous: + mnemonic = "ld2h"; + form = form_2; + break; + case LD2W_z_p_br_contiguous: + mnemonic = "ld2w"; + form = form_2; + break; + case LD3B_z_p_br_contiguous: + mnemonic = "ld3b"; + form = form_3; + break; + case LD3D_z_p_br_contiguous: + mnemonic = "ld3d"; + form = form_3; + break; + case LD3H_z_p_br_contiguous: + mnemonic = "ld3h"; + form = form_3; + break; + case LD3W_z_p_br_contiguous: + mnemonic = "ld3w"; + form = form_3; + break; + case LD4B_z_p_br_contiguous: + mnemonic = "ld4b"; + form = form_4; + break; + case LD4D_z_p_br_contiguous: + mnemonic = "ld4d"; + form = form_4; + break; + case LD4H_z_p_br_contiguous: + mnemonic = "ld4h"; + form = form_4; + break; + case LD4W_z_p_br_contiguous: + mnemonic = "ld4w"; + form = form_4; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadPredicateRegister)"; + + switch (instr->Mask(SVELoadPredicateRegisterMask)) { + case LDR_p_bi: + mnemonic = "ldr"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Pd, ['Xns]"; + } else { + form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVELoadVectorRegister)"; + + switch (instr->Mask(SVELoadVectorRegisterMask)) { + case LDR_z_bi: + mnemonic = "ldr"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Zd, ['Xns]"; + } else { + form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.b, p'u1310/'?04:mz, 'Pn.b"; + + switch (instr->Mask(SVEPartitionBreakConditionMask)) { + case BRKAS_p_p_p_z: + mnemonic = "brkas"; + break; + case BRKA_p_p_p: + mnemonic = "brka"; + break; + case BRKBS_p_p_p_z: + mnemonic = "brkbs"; + break; + case BRKB_p_p_p: + mnemonic = "brkb"; + break; + default: + form = "(SVEPartitionBreakCondition)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.'t, 'Pn.'t, 'Pm.'t"; + + switch (instr->Mask(SVEPermutePredicateElementsMask)) { + case TRN1_p_pp: + mnemonic = "trn1"; + break; + case TRN2_p_pp: + mnemonic = "trn2"; + break; + case UZP1_p_pp: + mnemonic = "uzp1"; + break; + case UZP2_p_pp: + mnemonic = "uzp2"; + break; + case ZIP1_p_pp: + mnemonic = "zip1"; + break; + case ZIP2_p_pp: + mnemonic = "zip2"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPredicateFirstActive)"; + + switch (instr->Mask(SVEPredicateFirstActiveMask)) { + case PFIRST_p_p_p: + mnemonic = "pfirst"; + form = "'Pd.b, 'Pn, 'Pd.b"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPredicateReadFromFFR_Unpredicated)"; + + switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) { + case RDFFR_p_f: + mnemonic = "rdffr"; + form = "'Pd.b"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateTest(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPredicateTest)"; + + switch (instr->Mask(SVEPredicateTestMask)) { + case PTEST_p_p: + mnemonic = "ptest"; + form = "p'u1310, 'Pn.b"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateZero(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPredicateZero)"; + + switch (instr->Mask(SVEPredicateZeroMask)) { + case PFALSE_p: + mnemonic = "pfalse"; + form = "'Pd.b"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPropagateBreakToNextPartition( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b"; + + switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) { + case BRKNS_p_p_pp: + mnemonic = "brkns"; + break; + case BRKN_p_p_pp: + mnemonic = "brkn"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEReversePredicateElements)"; + + switch (instr->Mask(SVEReversePredicateElementsMask)) { + case REV_p_p: + mnemonic = "rev"; + form = "'Pd.'t, 'Pn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEReverseVectorElements)"; + + switch (instr->Mask(SVEReverseVectorElementsMask)) { + case REV_z_z: + mnemonic = "rev"; + form = "'Zd.'t, 'Zn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + + unsigned size = instr->GetSVESize(); + switch (instr->Mask(SVEReverseWithinElementsMask)) { + case RBIT_z_p_z: + mnemonic = "rbit"; + break; + case REVB_z_z: + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + mnemonic = "revb"; + } else { + form = "(SVEReverseWithinElements)"; + } + break; + case REVH_z_z: + if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) { + mnemonic = "revh"; + } else { + form = "(SVEReverseWithinElements)"; + } + break; + case REVW_z_z: + if (size == kDRegSizeInBytesLog2) { + mnemonic = "revw"; + } else { + form = "(SVEReverseWithinElements)"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = IncDecFormHelper(instr, + "'R20d, 'Ipc, mul #'u1916+1", + "'R20d, 'Ipc", + "'R20d"); + const char *form_sx = IncDecFormHelper(instr, + "'Xd, 'Wd, 'Ipc, mul #'u1916+1", + "'Xd, 'Wd, 'Ipc", + "'Xd, 'Wd"); + + switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) { + case SQDECB_r_rs_sx: + mnemonic = "sqdecb"; + form = form_sx; + break; + case SQDECD_r_rs_sx: + mnemonic = "sqdecd"; + form = form_sx; + break; + case SQDECH_r_rs_sx: + mnemonic = "sqdech"; + form = form_sx; + break; + case SQDECW_r_rs_sx: + mnemonic = "sqdecw"; + form = form_sx; + break; + case SQINCB_r_rs_sx: + mnemonic = "sqincb"; + form = form_sx; + break; + case SQINCD_r_rs_sx: + mnemonic = "sqincd"; + form = form_sx; + break; + case SQINCH_r_rs_sx: + mnemonic = "sqinch"; + form = form_sx; + break; + case SQINCW_r_rs_sx: + mnemonic = "sqincw"; + form = form_sx; + break; + case SQDECB_r_rs_x: + mnemonic = "sqdecb"; + break; + case SQDECD_r_rs_x: + mnemonic = "sqdecd"; + break; + case SQDECH_r_rs_x: + mnemonic = "sqdech"; + break; + case SQDECW_r_rs_x: + mnemonic = "sqdecw"; + break; + case SQINCB_r_rs_x: + mnemonic = "sqincb"; + break; + case SQINCD_r_rs_x: + mnemonic = "sqincd"; + break; + case SQINCH_r_rs_x: + mnemonic = "sqinch"; + break; + case SQINCW_r_rs_x: + mnemonic = "sqincw"; + break; + case UQDECB_r_rs_uw: + case UQDECB_r_rs_x: + mnemonic = "uqdecb"; + break; + case UQDECD_r_rs_uw: + case UQDECD_r_rs_x: + mnemonic = "uqdecd"; + break; + case UQDECH_r_rs_uw: + case UQDECH_r_rs_x: + mnemonic = "uqdech"; + break; + case UQDECW_r_rs_uw: + case UQDECW_r_rs_x: + mnemonic = "uqdecw"; + break; + case UQINCB_r_rs_uw: + case UQINCB_r_rs_x: + mnemonic = "uqincb"; + break; + case UQINCD_r_rs_uw: + case UQINCD_r_rs_x: + mnemonic = "uqincd"; + break; + case UQINCH_r_rs_uw: + case UQINCH_r_rs_x: + mnemonic = "uqinch"; + break; + case UQINCW_r_rs_uw: + case UQINCW_r_rs_x: + mnemonic = "uqincw"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVESaturatingIncDecVectorByElementCount( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = IncDecFormHelper(instr, + "'Zd.'t, 'Ipc, mul #'u1916+1", + "'Zd.'t, 'Ipc", + "'Zd.'t"); + + switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) { + case SQDECD_z_zs: + mnemonic = "sqdecd"; + break; + case SQDECH_z_zs: + mnemonic = "sqdech"; + break; + case SQDECW_z_zs: + mnemonic = "sqdecw"; + break; + case SQINCD_z_zs: + mnemonic = "sqincd"; + break; + case SQINCH_z_zs: + mnemonic = "sqinch"; + break; + case SQINCW_z_zs: + mnemonic = "sqincw"; + break; + case UQDECD_z_zs: + mnemonic = "uqdecd"; + break; + case UQDECH_z_zs: + mnemonic = "uqdech"; + break; + case UQDECW_z_zs: + mnemonic = "uqdecw"; + break; + case UQINCD_z_zs: + mnemonic = "uqincd"; + break; + case UQINCH_z_zs: + mnemonic = "uqinch"; + break; + case UQINCW_z_zs: + mnemonic = "uqincw"; + break; + default: + form = "(SVEElementCount)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEStoreMultipleStructures_ScalarPlusImm)"; + + const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns'ISveSvl]"; + const char *form_3 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns'ISveSvl]"; + const char *form_4 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " + "'Pgl, ['Xns'ISveSvl]"; + + switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) { + case ST2B_z_p_bi_contiguous: + mnemonic = "st2b"; + form = form_2; + break; + case ST2H_z_p_bi_contiguous: + mnemonic = "st2h"; + form = form_2; + break; + case ST2W_z_p_bi_contiguous: + mnemonic = "st2w"; + form = form_2; + break; + case ST2D_z_p_bi_contiguous: + mnemonic = "st2d"; + form = form_2; + break; + case ST3B_z_p_bi_contiguous: + mnemonic = "st3b"; + form = form_3; + break; + case ST3H_z_p_bi_contiguous: + mnemonic = "st3h"; + form = form_3; + break; + case ST3W_z_p_bi_contiguous: + mnemonic = "st3w"; + form = form_3; + break; + case ST3D_z_p_bi_contiguous: + mnemonic = "st3d"; + form = form_3; + break; + case ST4B_z_p_bi_contiguous: + mnemonic = "st4b"; + form = form_4; + break; + case ST4H_z_p_bi_contiguous: + mnemonic = "st4h"; + form = form_4; + break; + case ST4W_z_p_bi_contiguous: + mnemonic = "st4w"; + form = form_4; + break; + case ST4D_z_p_bi_contiguous: + mnemonic = "st4d"; + form = form_4; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEStoreMultipleStructures_ScalarPlusScalar)"; + + const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]"; + const char *form_3 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]"; + const char *form_4 = + "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " + "'Pgl, ['Xns, 'Xm'NSveS]"; + + switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) { + case ST2B_z_p_br_contiguous: + mnemonic = "st2b"; + form = form_2; + break; + case ST2D_z_p_br_contiguous: + mnemonic = "st2d"; + form = form_2; + break; + case ST2H_z_p_br_contiguous: + mnemonic = "st2h"; + form = form_2; + break; + case ST2W_z_p_br_contiguous: + mnemonic = "st2w"; + form = form_2; + break; + case ST3B_z_p_br_contiguous: + mnemonic = "st3b"; + form = form_3; + break; + case ST3D_z_p_br_contiguous: + mnemonic = "st3d"; + form = form_3; + break; + case ST3H_z_p_br_contiguous: + mnemonic = "st3h"; + form = form_3; + break; + case ST3W_z_p_br_contiguous: + mnemonic = "st3w"; + form = form_3; + break; + case ST4B_z_p_br_contiguous: + mnemonic = "st4b"; + form = form_4; + break; + case ST4D_z_p_br_contiguous: + mnemonic = "st4d"; + form = form_4; + break; + case ST4H_z_p_br_contiguous: + mnemonic = "st4h"; + form = form_4; + break; + case ST4W_z_p_br_contiguous: + mnemonic = "st4w"; + form = form_4; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEStorePredicateRegister)"; + + switch (instr->Mask(SVEStorePredicateRegisterMask)) { + case STR_p_bi: + mnemonic = "str"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Pd, ['Xns]"; + } else { + form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEStoreVectorRegister)"; + + switch (instr->Mask(SVEStoreVectorRegisterMask)) { + case STR_z_bi: + mnemonic = "str"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Zd, ['Xns]"; + } else { + form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVETableLookup(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVETableLookup)"; + + switch (instr->Mask(SVETableLookupMask)) { + case TBL_z_zz_1: + mnemonic = "tbl"; + form = "'Zd.'t, {'Zn.'t}, 'Zm.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.h, 'Pn.b"; + + switch (instr->Mask(SVEUnpackPredicateElementsMask)) { + case PUNPKHI_p_p: + mnemonic = "punpkhi"; + break; + case PUNPKLO_p_p: + mnemonic = "punpklo"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Zn.'th"; + + if (instr->GetSVESize() == 0) { + // The lowest lane size of the destination vector is H-sized lane. + Format(instr, "unallocated", "(SVEUnpackVectorElements)"); + return; + } + + switch (instr->Mask(SVEUnpackVectorElementsMask)) { + case SUNPKHI_z_z: + mnemonic = "sunpkhi"; + break; + case SUNPKLO_z_z: + mnemonic = "sunpklo"; + break; + case UUNPKHI_z_z: + mnemonic = "uunpkhi"; + break; + case UUNPKLO_z_z: + mnemonic = "uunpklo"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEVectorSplice_Destructive(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEVectorSplice_Destructive)"; + + switch (instr->Mask(SVEVectorSplice_DestructiveMask)) { + case SPLICE_z_p_zz_des: + mnemonic = "splice"; + form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) { + const char *mnemonic = "adr"; + const char *form = "'Zd.d, ['Zn.d, 'Zm.d"; + const char *suffix = NULL; + + bool msz_is_zero = (instr->ExtractBits(11, 10) == 0); + + switch (instr->Mask(SVEAddressGenerationMask)) { + case ADR_z_az_d_s32_scaled: + suffix = msz_is_zero ? ", sxtw]" : ", sxtw #'u1110]"; + break; + case ADR_z_az_d_u32_scaled: + suffix = msz_is_zero ? ", uxtw]" : ", uxtw #'u1110]"; + break; + case ADR_z_az_s_same_scaled: + case ADR_z_az_d_same_scaled: + form = "'Zd.'t, ['Zn.'t, 'Zm.'t"; + suffix = msz_is_zero ? "]" : ", lsl #'u1110]"; + break; + default: + mnemonic = "unimplemented"; + form = "(SVEAddressGeneration)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEBitwiseLogicalUnpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.d, 'Zn.d, 'Zm.d"; + + switch (instr->Mask(SVEBitwiseLogicalUnpredicatedMask)) { + case AND_z_zz: + mnemonic = "and"; + break; + case BIC_z_zz: + mnemonic = "bic"; + break; + case EOR_z_zz: + mnemonic = "eor"; + break; + case ORR_z_zz: + mnemonic = "orr"; + if (instr->GetRn() == instr->GetRm()) { + mnemonic = "mov"; + form = "'Zd.d, 'Zn.d"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBitwiseShiftUnpredicated)"; + unsigned tsize = + (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19); + unsigned lane_size = instr->GetSVESize(); + + switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { + case ASR_z_zi: + if (tsize != 0) { + // The tsz field must not be zero. + mnemonic = "asr"; + form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; + } + break; + case ASR_z_zw: + if (lane_size <= kSRegSizeInBytesLog2) { + mnemonic = "asr"; + form = "'Zd.'t, 'Zn.'t, 'Zm.d"; + } + break; + case LSL_z_zi: + if (tsize != 0) { + // The tsz field must not be zero. + mnemonic = "lsl"; + form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSver"; + } + break; + case LSL_z_zw: + if (lane_size <= kSRegSizeInBytesLog2) { + mnemonic = "lsl"; + form = "'Zd.'t, 'Zn.'t, 'Zm.d"; + } + break; + case LSR_z_zi: + if (tsize != 0) { + // The tsz field must not be zero. + mnemonic = "lsr"; + form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; + } + break; + case LSR_z_zw: + if (lane_size <= kSRegSizeInBytesLog2) { + mnemonic = "lsr"; + form = "'Zd.'t, 'Zn.'t, 'Zm.d"; + } + break; + default: + break; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEElementCount(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = + IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); + + switch (instr->Mask(SVEElementCountMask)) { + case CNTB_r_s: + mnemonic = "cntb"; + break; + case CNTD_r_s: + mnemonic = "cntd"; + break; + case CNTH_r_s: + mnemonic = "cnth"; + break; + case CNTW_r_s: + mnemonic = "cntw"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPAccumulatingReduction)"; + + switch (instr->Mask(SVEFPAccumulatingReductionMask)) { + case FADDA_v_p_z: + mnemonic = "fadda"; + form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + + switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) { + case FADD_z_zz: + mnemonic = "fadd"; + break; + case FMUL_z_zz: + mnemonic = "fmul"; + break; + case FRECPS_z_zz: + mnemonic = "frecps"; + break; + case FRSQRTS_z_zz: + mnemonic = "frsqrts"; + break; + case FSUB_z_zz: + mnemonic = "fsub"; + break; + case FTSMUL_z_zz: + mnemonic = "ftsmul"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + + switch (instr->Mask(SVEFPCompareVectorsMask)) { + case FACGE_p_p_zz: + mnemonic = "facge"; + break; + case FACGT_p_p_zz: + mnemonic = "facgt"; + break; + case FCMEQ_p_p_zz: + mnemonic = "fcmeq"; + break; + case FCMGE_p_p_zz: + mnemonic = "fcmge"; + break; + case FCMGT_p_p_zz: + mnemonic = "fcmgt"; + break; + case FCMNE_p_p_zz: + mnemonic = "fcmne"; + break; + case FCMUO_p_p_zz: + mnemonic = "fcmuo"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0"; + + switch (instr->Mask(SVEFPCompareWithZeroMask)) { + case FCMEQ_p_p_z0: + mnemonic = "fcmeq"; + break; + case FCMGE_p_p_z0: + mnemonic = "fcmge"; + break; + case FCMGT_p_p_z0: + mnemonic = "fcmgt"; + break; + case FCMLE_p_p_z0: + mnemonic = "fcmle"; + break; + case FCMLT_p_p_z0: + mnemonic = "fcmlt"; + break; + case FCMNE_p_p_z0: + mnemonic = "fcmne"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPComplexAddition)"; + + switch (instr->Mask(SVEFPComplexAdditionMask)) { + case FCADD_z_p_zz: + mnemonic = "fcadd"; + if (instr->ExtractBit(16) == 0) { + form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #90"; + } else { + form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #270"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPComplexMulAdd)"; + const char *suffix = NULL; + + const char *fcmla_constants[] = {"0", "90", "180", "270"}; + + switch (instr->Mask(SVEFPComplexMulAddMask)) { + case FCMLA_z_p_zzz: + mnemonic = "fcmla"; + form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #"; + suffix = fcmla_constants[instr->ExtractBits(14, 13)]; + break; + default: + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPComplexMulAddIndex)"; + + const char *fcmla_constants[] = {"0", "90", "180", "270"}; + const char *suffix = fcmla_constants[instr->ExtractBits(11, 10)]; + + switch (instr->Mask(SVEFPComplexMulAddIndexMask)) { + case FCMLA_z_zzzi_h: + mnemonic = "fcmla"; + form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #"; + break; + case FCMLA_z_zzzi_s: + mnemonic = "fcmla"; + form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #"; + break; + default: + suffix = NULL; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'t'u0400, 'Pgl, 'Zn.'t"; + + switch (instr->Mask(SVEFPFastReductionMask)) { + case FADDV_v_p_z: + mnemonic = "faddv"; + break; + case FMAXNMV_v_p_z: + mnemonic = "fmaxnmv"; + break; + case FMAXV_v_p_z: + mnemonic = "fmaxv"; + break; + case FMINNMV_v_p_z: + mnemonic = "fminnmv"; + break; + case FMINV_v_p_z: + mnemonic = "fminv"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPMulIndex)"; + + switch (instr->Mask(SVEFPMulIndexMask)) { + case FMUL_z_zzi_d: + mnemonic = "fmul"; + form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + break; + case FMUL_z_zzi_h: + case FMUL_z_zzi_h_i3h: + mnemonic = "fmul"; + form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + break; + case FMUL_z_zzi_s: + mnemonic = "fmul"; + form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; + + switch (instr->Mask(SVEFPMulAddMask)) { + case FMAD_z_p_zzz: + mnemonic = "fmad"; + break; + case FMLA_z_p_zzz: + mnemonic = "fmla"; + break; + case FMLS_z_p_zzz: + mnemonic = "fmls"; + break; + case FMSB_z_p_zzz: + mnemonic = "fmsb"; + break; + case FNMAD_z_p_zzz: + mnemonic = "fnmad"; + break; + case FNMLA_z_p_zzz: + mnemonic = "fnmla"; + break; + case FNMLS_z_p_zzz: + mnemonic = "fnmls"; + break; + case FNMSB_z_p_zzz: + mnemonic = "fnmsb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEFPMulAddIndex)"; + + switch (instr->Mask(SVEFPMulAddIndexMask)) { + case FMLA_z_zzzi_d: + mnemonic = "fmla"; + form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + break; + case FMLA_z_zzzi_s: + mnemonic = "fmla"; + form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + break; + case FMLS_z_zzzi_d: + mnemonic = "fmls"; + form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + break; + case FMLS_z_zzzi_s: + mnemonic = "fmls"; + form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + break; + case FMLA_z_zzzi_h: + case FMLA_z_zzzi_h_i3h: + mnemonic = "fmla"; + form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + break; + case FMLS_z_zzzi_h: + case FMLS_z_zzzi_h_i3h: + mnemonic = "fmls"; + form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + break; + default: + break; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Zn.'t"; + + switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) { + case FRECPE_z_z: + mnemonic = "frecpe"; + break; + case FRSQRTE_z_z: + mnemonic = "frsqrte"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEIncDecByPredicateCount)"; + + switch (instr->Mask(SVEIncDecByPredicateCountMask)) { + case DECP_r_p_r: + case DECP_z_p_z: + mnemonic = "decp"; + break; + case INCP_r_p_r: + case INCP_z_p_z: + mnemonic = "incp"; + break; + case SQDECP_r_p_r_sx: + case SQDECP_r_p_r_x: + case SQDECP_z_p_z: + mnemonic = "sqdecp"; + break; + case SQINCP_r_p_r_sx: + case SQINCP_r_p_r_x: + case SQINCP_z_p_z: + mnemonic = "sqincp"; + break; + case UQDECP_r_p_r_uw: + case UQDECP_r_p_r_x: + case UQDECP_z_p_z: + mnemonic = "uqdecp"; + break; + case UQINCP_r_p_r_uw: + case UQINCP_r_p_r_x: + case UQINCP_z_p_z: + mnemonic = "uqincp"; + break; + default: + break; + } + + switch (instr->Mask(SVEIncDecByPredicateCountMask)) { + // <Xdn>, <Pg>.<T> + case DECP_r_p_r: + case INCP_r_p_r: + form = "'Xd, 'Pn.'t"; + break; + // <Zdn>.<T>, <Pg> + case DECP_z_p_z: + case INCP_z_p_z: + case SQDECP_z_p_z: + case SQINCP_z_p_z: + case UQDECP_z_p_z: + case UQINCP_z_p_z: + form = "'Zd.'t, 'Pn"; + break; + // <Xdn>, <Pg>.<T>, <Wdn> + case SQDECP_r_p_r_sx: + case SQINCP_r_p_r_sx: + form = "'Xd, 'Pn.'t, 'Wd"; + break; + // <Xdn>, <Pg>.<T> + case SQDECP_r_p_r_x: + case SQINCP_r_p_r_x: + case UQDECP_r_p_r_x: + case UQINCP_r_p_r_x: + form = "'Xd, 'Pn.'t"; + break; + // <Wdn>, <Pg>.<T> + case UQDECP_r_p_r_uw: + case UQINCP_r_p_r_uw: + form = "'Wd, 'Pn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEIndexGeneration)"; + + bool w_inputs = + static_cast<unsigned>(instr->GetSVESize()) <= kWRegSizeInBytesLog2; + + switch (instr->Mask(SVEIndexGenerationMask)) { + case INDEX_z_ii: + mnemonic = "index"; + form = "'Zd.'t, #'s0905, #'s2016"; + break; + case INDEX_z_ir: + mnemonic = "index"; + form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm"; + break; + case INDEX_z_ri: + mnemonic = "index"; + form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016"; + break; + case INDEX_z_rr: + mnemonic = "index"; + form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + + switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) { + case ADD_z_zz: + mnemonic = "add"; + break; + case SQADD_z_zz: + mnemonic = "sqadd"; + break; + case SQSUB_z_zz: + mnemonic = "sqsub"; + break; + case SUB_z_zz: + mnemonic = "sub"; + break; + case UQADD_z_zz: + mnemonic = "uqadd"; + break; + case UQSUB_z_zz: + mnemonic = "uqsub"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016"; + + switch (instr->Mask(SVEIntCompareSignedImmMask)) { + case CMPEQ_p_p_zi: + mnemonic = "cmpeq"; + break; + case CMPGE_p_p_zi: + mnemonic = "cmpge"; + break; + case CMPGT_p_p_zi: + mnemonic = "cmpgt"; + break; + case CMPLE_p_p_zi: + mnemonic = "cmple"; + break; + case CMPLT_p_p_zi: + mnemonic = "cmplt"; + break; + case CMPNE_p_p_zi: + mnemonic = "cmpne"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014"; + + switch (instr->Mask(SVEIntCompareUnsignedImmMask)) { + case CMPHI_p_p_zi: + mnemonic = "cmphi"; + break; + case CMPHS_p_p_zi: + mnemonic = "cmphs"; + break; + case CMPLO_p_p_zi: + mnemonic = "cmplo"; + break; + case CMPLS_p_p_zi: + mnemonic = "cmpls"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.d"; + + switch (instr->Mask(SVEIntCompareVectorsMask)) { + case CMPEQ_p_p_zw: + mnemonic = "cmpeq"; + break; + case CMPEQ_p_p_zz: + mnemonic = "cmpeq"; + form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + break; + case CMPGE_p_p_zw: + mnemonic = "cmpge"; + break; + case CMPGE_p_p_zz: + mnemonic = "cmpge"; + form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + break; + case CMPGT_p_p_zw: + mnemonic = "cmpgt"; + break; + case CMPGT_p_p_zz: + mnemonic = "cmpgt"; + form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + break; + case CMPHI_p_p_zw: + mnemonic = "cmphi"; + break; + case CMPHI_p_p_zz: + mnemonic = "cmphi"; + form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + break; + case CMPHS_p_p_zw: + mnemonic = "cmphs"; + break; + case CMPHS_p_p_zz: + mnemonic = "cmphs"; + form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + break; + case CMPLE_p_p_zw: + mnemonic = "cmple"; + break; + case CMPLO_p_p_zw: + mnemonic = "cmplo"; + break; + case CMPLS_p_p_zw: + mnemonic = "cmpls"; + break; + case CMPLT_p_p_zw: + mnemonic = "cmplt"; + break; + case CMPNE_p_p_zw: + mnemonic = "cmpne"; + break; + case CMPNE_p_p_zz: + mnemonic = "cmpne"; + form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEIntMulAddPredicated)"; + + switch (instr->Mask(SVEIntMulAddPredicatedMask)) { + case MAD_z_p_zzz: + mnemonic = "mad"; + form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t"; + break; + case MLA_z_p_zzz: + mnemonic = "mla"; + form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; + break; + case MLS_z_p_zzz: + mnemonic = "mls"; + form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; + break; + case MSB_z_p_zzz: + mnemonic = "msb"; + form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEIntMulAddUnpredicated)"; + + if (static_cast<unsigned>(instr->GetSVESize()) >= kSRegSizeInBytesLog2) { + form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq"; + switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) { + case SDOT_z_zzz: + mnemonic = "sdot"; + break; + case UDOT_z_zzz: + mnemonic = "udot"; + break; + default: + break; + } + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEMovprfx(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEMovprfx)"; + + if (instr->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z) { + mnemonic = "movprfx"; + form = "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t"; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntReduction(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Vdv, 'Pgl, 'Zn.'t"; + + if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) { + switch (instr->Mask(SVEIntReductionLogicalMask)) { + case ANDV_r_p_z: + mnemonic = "andv"; + break; + case EORV_r_p_z: + mnemonic = "eorv"; + break; + case ORV_r_p_z: + mnemonic = "orv"; + break; + default: + break; + } + } else { + switch (instr->Mask(SVEIntReductionMask)) { + case SADDV_r_p_z: + mnemonic = "saddv"; + form = "'Dd, 'Pgl, 'Zn.'t"; + break; + case SMAXV_r_p_z: + mnemonic = "smaxv"; + break; + case SMINV_r_p_z: + mnemonic = "sminv"; + break; + case UADDV_r_p_z: + mnemonic = "uaddv"; + form = "'Dd, 'Pgl, 'Zn.'t"; + break; + case UMAXV_r_p_z: + mnemonic = "umaxv"; + break; + case UMINV_r_p_z: + mnemonic = "uminv"; + break; + default: + break; + } + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEIntUnaryArithmeticPredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + + switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) { + case ABS_z_p_z: + mnemonic = "abs"; + break; + case CLS_z_p_z: + mnemonic = "cls"; + break; + case CLZ_z_p_z: + mnemonic = "clz"; + break; + case CNOT_z_p_z: + mnemonic = "cnot"; + break; + case CNT_z_p_z: + mnemonic = "cnt"; + break; + case FABS_z_p_z: + mnemonic = "fabs"; + break; + case FNEG_z_p_z: + mnemonic = "fneg"; + break; + case NEG_z_p_z: + mnemonic = "neg"; + break; + case NOT_z_p_z: + mnemonic = "not"; + break; + case SXTB_z_p_z: + mnemonic = "sxtb"; + break; + case SXTH_z_p_z: + mnemonic = "sxth"; + break; + case SXTW_z_p_z: + mnemonic = "sxtw"; + break; + case UXTB_z_p_z: + mnemonic = "uxtb"; + break; + case UXTH_z_p_z: + mnemonic = "uxth"; + break; + case UXTW_z_p_z: + mnemonic = "uxtw"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEMulIndex(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEMulIndex)"; + + switch (instr->Mask(SVEMulIndexMask)) { + case SDOT_z_zzzi_d: + mnemonic = "sdot"; + form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; + break; + case SDOT_z_zzzi_s: + mnemonic = "sdot"; + form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; + break; + case UDOT_z_zzzi_d: + mnemonic = "udot"; + form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; + break; + case UDOT_z_zzzi_s: + mnemonic = "udot"; + form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPermuteVectorExtract)"; + + switch (instr->Mask(SVEPermuteVectorExtractMask)) { + case EXT_z_zi_des: + mnemonic = "ext"; + form = "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + + switch (instr->Mask(SVEPermuteVectorInterleavingMask)) { + case TRN1_z_zz: + mnemonic = "trn1"; + break; + case TRN2_z_zz: + mnemonic = "trn2"; + break; + case UZP1_z_zz: + mnemonic = "uzp1"; + break; + case UZP2_z_zz: + mnemonic = "uzp2"; + break; + case ZIP1_z_zz: + mnemonic = "zip1"; + break; + case ZIP2_z_zz: + mnemonic = "zip2"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateCount(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPredicateCount)"; + + switch (instr->Mask(SVEPredicateCountMask)) { + case CNTP_r_p_p: + mnemonic = "cntp"; + form = "'Xd, p'u1310, 'Pn.'t"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; + + int pd = instr->GetPd(); + int pn = instr->GetPn(); + int pm = instr->GetPm(); + int pg = instr->ExtractBits(13, 10); + + switch (instr->Mask(SVEPredicateLogicalMask)) { + case ANDS_p_p_pp_z: + mnemonic = "ands"; + if (pn == pm) { + mnemonic = "movs"; + form = "'Pd.b, p'u1310/z, 'Pn.b"; + } + break; + case AND_p_p_pp_z: + mnemonic = "and"; + if (pn == pm) { + mnemonic = "mov"; + form = "'Pd.b, p'u1310/z, 'Pn.b"; + } + break; + case BICS_p_p_pp_z: + mnemonic = "bics"; + break; + case BIC_p_p_pp_z: + mnemonic = "bic"; + break; + case EORS_p_p_pp_z: + mnemonic = "eors"; + if (pm == pg) { + mnemonic = "nots"; + form = "'Pd.b, 'Pm/z, 'Pn.b"; + } + break; + case EOR_p_p_pp_z: + mnemonic = "eor"; + if (pm == pg) { + mnemonic = "not"; + form = "'Pd.b, 'Pm/z, 'Pn.b"; + } + break; + case NANDS_p_p_pp_z: + mnemonic = "nands"; + break; + case NAND_p_p_pp_z: + mnemonic = "nand"; + break; + case NORS_p_p_pp_z: + mnemonic = "nors"; + break; + case NOR_p_p_pp_z: + mnemonic = "nor"; + break; + case ORNS_p_p_pp_z: + mnemonic = "orns"; + break; + case ORN_p_p_pp_z: + mnemonic = "orn"; + break; + case ORRS_p_p_pp_z: + mnemonic = "orrs"; + if ((pn == pm) && (pn == pg)) { + mnemonic = "movs"; + form = "'Pd.b, 'Pn.b"; + } + break; + case ORR_p_p_pp_z: + mnemonic = "orr"; + if ((pn == pm) && (pn == pg)) { + mnemonic = "mov"; + form = "'Pd.b, 'Pn.b"; + } + break; + case SEL_p_p_pp: + if (pd == pm) { + mnemonic = "mov"; + form = "'Pd.b, p'u1310/m, 'Pn.b"; + } else { + mnemonic = "sel"; + form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b"; + } + break; + default: + form = "(SVEPredicateLogical)"; + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) { + // This group only contains PTRUE{S}, and there are no unallocated encodings. + VIXL_STATIC_ASSERT( + SVEPredicateInitializeMask == + (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit)); + VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) || + (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s)); + + const char *mnemonic = instr->ExtractBit(16) ? "ptrues" : "ptrue"; + const char *form = "'Pd.'t, 'Ipc"; + // Omit the pattern if it is the default ('ALL'). + if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t"; + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) { + // This group only contains PNEXT, and there are no unallocated encodings. + VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask); + VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p); + + Format(instr, "pnext", "'Pd.'t, 'Pn, 'Pd.'t"); +} + +void Disassembler::VisitSVEPredicateReadFromFFR_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEPredicateReadFromFFR_Predicated)"; + switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) { + case RDFFR_p_p_f: + case RDFFRS_p_p_f: + mnemonic = instr->ExtractBit(22) ? "rdffrs" : "rdffr"; + form = "'Pd.b, 'Pn/z"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; + + switch (instr->Mask(SVEPropagateBreakMask)) { + case BRKPAS_p_p_pp: + mnemonic = "brkpas"; + break; + case BRKPA_p_p_pp: + mnemonic = "brkpa"; + break; + case BRKPBS_p_p_pp: + mnemonic = "brkpbs"; + break; + case BRKPB_p_p_pp: + mnemonic = "brkpb"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Xds, 'Xms, #'s1005"; + + switch (instr->Mask(SVEStackFrameAdjustmentMask)) { + case ADDPL_r_ri: + mnemonic = "addpl"; + break; + case ADDVL_r_ri: + mnemonic = "addvl"; + break; + default: + form = "(SVEStackFrameAdjustment)"; + break; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEStackFrameSize)"; + + switch (instr->Mask(SVEStackFrameSizeMask)) { + case RDVL_r_i: + mnemonic = "rdvl"; + form = "'Xd, #'s1005"; + break; + default: + break; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEVectorSelect(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEVectorSelect)"; + + switch (instr->Mask(SVEVectorSelectMask)) { + case SEL_z_p_zz: + if (instr->GetRd() == instr->GetRm()) { + mnemonic = "mov"; + form = "'Zd.'t, p'u1310/m, 'Zn.'t"; + } else { + mnemonic = "sel"; + form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + + switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) { + case LD1B_z_p_bi_u16: + case LD1B_z_p_bi_u32: + case LD1B_z_p_bi_u64: + case LD1B_z_p_bi_u8: + mnemonic = "ld1b"; + break; + case LD1D_z_p_bi_u64: + mnemonic = "ld1d"; + break; + case LD1H_z_p_bi_u16: + case LD1H_z_p_bi_u32: + case LD1H_z_p_bi_u64: + mnemonic = "ld1h"; + break; + case LD1SB_z_p_bi_s16: + case LD1SB_z_p_bi_s32: + case LD1SB_z_p_bi_s64: + mnemonic = "ld1sb"; + break; + case LD1SH_z_p_bi_s32: + case LD1SH_z_p_bi_s64: + mnemonic = "ld1sh"; + break; + case LD1SW_z_p_bi_s64: + mnemonic = "ld1sw"; + break; + case LD1W_z_p_bi_u32: + case LD1W_z_p_bi_u64: + mnemonic = "ld1w"; + break; + default: + form = "(SVEContiguousLoad_ScalarPlusImm)"; + suffix = NULL; + break; + } + + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm"; + const char *suffix = NULL; + + switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { + case LD1B_z_p_br_u16: + case LD1B_z_p_br_u32: + case LD1B_z_p_br_u64: + case LD1B_z_p_br_u8: + mnemonic = "ld1b"; + suffix = "]"; + break; + case LD1D_z_p_br_u64: + mnemonic = "ld1d"; + suffix = ", lsl #'u2423]"; + break; + case LD1H_z_p_br_u16: + case LD1H_z_p_br_u32: + case LD1H_z_p_br_u64: + mnemonic = "ld1h"; + suffix = ", lsl #'u2423]"; + break; + case LD1SB_z_p_br_s16: + case LD1SB_z_p_br_s32: + case LD1SB_z_p_br_s64: + mnemonic = "ld1sb"; + suffix = "]"; + break; + case LD1SH_z_p_br_s32: + case LD1SH_z_p_br_s64: + mnemonic = "ld1sh"; + suffix = ", lsl #1]"; + break; + case LD1SW_z_p_br_s64: + mnemonic = "ld1sw"; + suffix = ", lsl #2]"; + break; + case LD1W_z_p_br_u32: + case LD1W_z_p_br_u64: + mnemonic = "ld1w"; + suffix = ", lsl #'u2423]"; + break; + default: + form = "(SVEContiguousLoad_ScalarPlusScalar)"; + suffix = NULL; + break; + } + + Format(instr, mnemonic, form, suffix); +} void Disassembler::VisitReserved(const Instruction *instr) { // UDF is the only instruction in this group, and the Decoder is precise. @@ -5059,14 +9633,18 @@ int64_t Disassembler::CodeRelativeAddress(const void *addr) { void Disassembler::Format(const Instruction *instr, const char *mnemonic, - const char *format) { + const char *format0, + const char *format1) { VIXL_ASSERT(mnemonic != NULL); ResetOutput(); Substitute(instr, mnemonic); - if (format != NULL) { + if (format0 != NULL) { VIXL_ASSERT(buffer_pos_ < buffer_size_); buffer_[buffer_pos_++] = ' '; - Substitute(instr, format); + Substitute(instr, format0); + if (format1 != NULL) { + Substitute(instr, format1); + } } VIXL_ASSERT(buffer_pos_ < buffer_size_); buffer_[buffer_pos_] = 0; @@ -5091,10 +9669,11 @@ void Disassembler::Substitute(const Instruction *instr, const char *string) { int Disassembler::SubstituteField(const Instruction *instr, const char *format) { switch (format[0]) { - // NB. The remaining substitution prefix characters are: GJKUZ. - case 'R': // Register. X or W, selected by sf bit. + // NB. The remaining substitution prefix upper-case characters are: JU. + case 'R': // Register. X or W, selected by sf (or alternative) bit. case 'F': // FP register. S or D, selected by type field. case 'V': // Vector register, V, vector format. + case 'Z': // Scalable vector register. case 'W': case 'X': case 'B': @@ -5103,14 +9682,14 @@ int Disassembler::SubstituteField(const Instruction *instr, case 'D': case 'Q': return SubstituteRegisterField(instr, format); + case 'P': + return SubstitutePredicateRegisterField(instr, format); case 'I': return SubstituteImmediateField(instr, format); case 'L': return SubstituteLiteralField(instr, format); case 'N': return SubstituteShiftField(instr, format); - case 'P': - return SubstitutePrefetchField(instr, format); case 'C': return SubstituteConditionField(instr, format); case 'E': @@ -5127,6 +9706,15 @@ int Disassembler::SubstituteField(const Instruction *instr, return SubstituteCrField(instr, format); case 'G': return SubstituteSysOpField(instr, format); + case 'p': + return SubstitutePrefetchField(instr, format); + case 'u': + case 's': + return SubstituteIntField(instr, format); + case 't': + return SubstituteSVESize(instr, format); + case '?': + return SubstituteTernary(instr, format); default: { VIXL_UNREACHABLE(); return 1; @@ -5134,55 +9722,20 @@ int Disassembler::SubstituteField(const Instruction *instr, } } +std::pair<unsigned, unsigned> Disassembler::GetRegNumForField( + const Instruction *instr, char reg_prefix, const char *field) { + unsigned reg_num = UINT_MAX; + unsigned field_len = 1; -int Disassembler::SubstituteRegisterField(const Instruction *instr, - const char *format) { - char reg_prefix = format[0]; - unsigned reg_num = 0; - unsigned field_len = 2; - - switch (format[1]) { + switch (field[0]) { case 'd': reg_num = instr->GetRd(); - if (format[2] == 'q') { - reg_prefix = instr->GetNEONQ() ? 'X' : 'W'; - field_len = 3; - } break; case 'n': reg_num = instr->GetRn(); break; case 'm': reg_num = instr->GetRm(); - switch (format[2]) { - // Handle registers tagged with b (bytes), z (instruction), or - // r (registers), used for address updates in - // NEON load/store instructions. - case 'r': - case 'b': - case 'z': { - field_len = 3; - char *eimm; - int imm = static_cast<int>(strtol(&format[3], &eimm, 10)); - field_len += eimm - &format[3]; - if (reg_num == 31) { - switch (format[2]) { - case 'z': - imm *= (1 << instr->GetNEONLSSize()); - break; - case 'r': - imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes - : kQRegSizeInBytes; - break; - case 'b': - break; - } - AppendToOutput("#%d", imm); - return field_len; - } - break; - } - } break; case 'e': // This is register Rm, but using a 4-bit specifier. Used in NEON @@ -5197,72 +9750,121 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr, break; case 't': reg_num = instr->GetRt(); - if (format[0] == 'V') { - if ((format[2] >= '2') && (format[2] <= '4')) { - // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4. - reg_num = (reg_num + format[2] - '1') % 32; - field_len = 3; - } - } else { - if (format[2] == '2') { - // Handle register specifier Rt2. - reg_num = instr->GetRt2(); - field_len = 3; - } - } break; - case '(': { - switch (format[2]) { - case 's': - reg_num = instr->GetRs(); - break; - case 't': - reg_num = instr->GetRt(); - break; - default: - VIXL_UNREACHABLE(); - } + default: + VIXL_UNREACHABLE(); + } - VIXL_ASSERT(format[3] == '+'); - int i = 4; - int addition = 0; - while (format[i] != ')') { - VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9')); - addition *= 10; - addition += format[i] - '0'; - ++i; + switch (field[1]) { + case '2': + case '3': + case '4': + if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // Vt2/3/4, Zt2/3/4 + VIXL_ASSERT(field[0] == 't'); + reg_num = (reg_num + field[1] - '1') % 32; + field_len++; + } else { + VIXL_ASSERT((field[0] == 't') && (field[1] == '2')); + reg_num = instr->GetRt2(); + field_len++; } - reg_num += addition; - field_len = i + 1; break; + case '+': // Rt+, Rs+ (ie. Rt + 1, Rs + 1) + VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X')); + VIXL_ASSERT((field[0] == 's') || (field[0] == 't')); + reg_num++; + field_len++; + break; + case 's': // Core registers that are (w)sp rather than zr. + VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X')); + reg_num = (reg_num == kZeroRegCode) ? kSPRegInternalCode : reg_num; + field_len++; + break; + } + + VIXL_ASSERT(reg_num != UINT_MAX); + return std::make_pair(reg_num, field_len); +} + +int Disassembler::SubstituteRegisterField(const Instruction *instr, + const char *format) { + unsigned field_len = 1; // Initially, count only the first character. + + // The first character of the register format field, eg R, X, S, etc. + char reg_prefix = format[0]; + + // Pointer to the character after the prefix. This may be one of the standard + // symbols representing a register encoding, or a two digit bit position, + // handled by the following code. + const char *reg_field = &format[1]; + + if (reg_prefix == 'R') { + bool is_x = instr->GetSixtyFourBits(); + if (strspn(reg_field, "0123456789") == 2) { // r20d, r31n, etc. + // Core W or X registers where the type is determined by a specified bit + // position, eg. 'R20d, 'R05n. This is like the 'Rd syntax, where bit 31 + // is implicitly used to select between W and X. + int bitpos = ((reg_field[0] - '0') * 10) + (reg_field[1] - '0'); + VIXL_ASSERT(bitpos <= 31); + is_x = (instr->ExtractBit(bitpos) == 1); + reg_field = &format[3]; + field_len += 2; } - default: - VIXL_UNREACHABLE(); + reg_prefix = is_x ? 'X' : 'W'; } - // Increase field length for registers tagged as stack. - if (format[1] != '(' && format[2] == 's') { - field_len = 3; + std::pair<unsigned, unsigned> rn = + GetRegNumForField(instr, reg_prefix, reg_field); + unsigned reg_num = rn.first; + field_len += rn.second; + + if (reg_field[0] == 'm') { + switch (reg_field[1]) { + // Handle registers tagged with b (bytes), z (instruction), or + // r (registers), used for address updates in NEON load/store + // instructions. + case 'r': + case 'b': + case 'z': { + VIXL_ASSERT(reg_prefix == 'X'); + field_len = 3; + char *eimm; + int imm = static_cast<int>(strtol(®_field[2], &eimm, 10)); + field_len += eimm - ®_field[2]; + if (reg_num == 31) { + switch (reg_field[1]) { + case 'z': + imm *= (1 << instr->GetNEONLSSize()); + break; + case 'r': + imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes + : kQRegSizeInBytes; + break; + case 'b': + break; + } + AppendToOutput("#%d", imm); + return field_len; + } + break; + } + } } CPURegister::RegisterType reg_type = CPURegister::kRegister; unsigned reg_size = kXRegSize; - switch (reg_prefix) { - case 'R': - reg_prefix = instr->GetSixtyFourBits() ? 'X' : 'W'; - break; - case 'F': - switch (instr->GetFPType()) { - case 3: - reg_prefix = 'H'; - break; - case 0: - reg_prefix = 'S'; - break; - default: - reg_prefix = 'D'; - } + if (reg_prefix == 'F') { + switch (instr->GetFPType()) { + case 3: + reg_prefix = 'H'; + break; + case 0: + reg_prefix = 'S'; + break; + default: + reg_prefix = 'D'; + } } switch (reg_prefix) { @@ -5295,22 +9897,51 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr, reg_size = kQRegSize; break; case 'V': + if (reg_field[1] == 'v') { + reg_type = CPURegister::kVRegister; + reg_size = 1 << (instr->GetSVESize() + 3); + field_len++; + break; + } AppendToOutput("v%d", reg_num); return field_len; + case 'Z': + AppendToOutput("z%d", reg_num); + return field_len; default: VIXL_UNREACHABLE(); } - if ((reg_type == CPURegister::kRegister) && (reg_num == kZeroRegCode) && - (format[2] == 's')) { - reg_num = kSPRegInternalCode; - } - AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type)); return field_len; } +int Disassembler::SubstitutePredicateRegisterField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'P'); + switch (format[1]) { + // This field only supports P register that are always encoded in the same + // position. + case 'd': + case 't': + AppendToOutput("p%u", instr->GetPt()); + break; + case 'n': + AppendToOutput("p%u", instr->GetPn()); + break; + case 'm': + AppendToOutput("p%u", instr->GetPm()); + break; + case 'g': + VIXL_ASSERT(format[2] == 'l'); + AppendToOutput("p%u", instr->GetPgLow8()); + return 3; + default: + VIXL_UNREACHABLE(); + } + return 2; +} int Disassembler::SubstituteImmediateField(const Instruction *instr, const char *format) { @@ -5391,36 +10022,92 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, return 6; } case 'A': { // IAddSub. - VIXL_ASSERT(instr->GetShiftAddSub() <= 1); - int64_t imm = instr->GetImmAddSub() << (12 * instr->GetShiftAddSub()); + int64_t imm = instr->GetImmAddSub() << (12 * instr->GetImmAddSubShift()); AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm); return 7; } - case 'F': { // IFPHalf, IFPSingle, IFPDouble, or IFPFBits. - if (format[3] == 'F') { // IFPFbits. - AppendToOutput("#%" PRId32, 64 - instr->GetFPScale()); - return 8; - } else { - AppendToOutput("#0x%" PRIx32 " (%.4f)", - instr->GetImmFP(), - format[3] == 'H' - ? FPToFloat(instr->GetImmFP16(), kIgnoreDefaultNaN) - : (format[3] == 'S') ? instr->GetImmFP32() - : instr->GetImmFP64()); - if (format[3] == 'H') { - return 7; - } else { - return 9; - } + case 'F': { // IFP, IFPNeon, IFPSve or IFPFBits. + int imm8 = 0; + int len = strlen("IFP"); + switch (format[3]) { + case 'F': + VIXL_ASSERT(strncmp(format, "IFPFBits", strlen("IFPFBits")) == 0); + AppendToOutput("#%" PRId32, 64 - instr->GetFPScale()); + return strlen("IFPFBits"); + case 'N': + VIXL_ASSERT(strncmp(format, "IFPNeon", strlen("IFPNeon")) == 0); + imm8 = instr->GetImmNEONabcdefgh(); + len += strlen("Neon"); + break; + case 'S': + VIXL_ASSERT(strncmp(format, "IFPSve", strlen("IFPSve")) == 0); + imm8 = instr->ExtractBits(12, 5); + len += strlen("Sve"); + break; + default: + VIXL_ASSERT(strncmp(format, "IFP", strlen("IFP")) == 0); + imm8 = instr->GetImmFP(); + break; } + AppendToOutput("#0x%" PRIx32 " (%.4f)", + imm8, + Instruction::Imm8ToFP32(imm8)); + return len; } case 'H': { // IH - ImmHint AppendToOutput("#%" PRId32, instr->GetImmHint()); return 2; } case 'T': { // ITri - Immediate Triangular Encoded. - AppendToOutput("#0x%" PRIx64, instr->GetImmLogical()); - return 4; + if (format[4] == 'S') { + VIXL_ASSERT((format[5] == 'v') && (format[6] == 'e')); + switch (format[7]) { + case 'l': + // SVE logical immediate encoding. + AppendToOutput("#0x%" PRIx64, instr->GetSVEImmLogical()); + return 8; + case 'p': { + // SVE predicated shift immediate encoding, lsl. + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ true); + int lane_bits = 8 << shift_and_lane_size.second; + AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first); + return 8; + } + case 'q': { + // SVE predicated shift immediate encoding, asr and lsr. + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ true); + AppendToOutput("#%" PRId32, shift_and_lane_size.first); + return 8; + } + case 'r': { + // SVE unpredicated shift immediate encoding, lsl. + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ false); + int lane_bits = 8 << shift_and_lane_size.second; + AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first); + return 8; + } + case 's': { + // SVE unpredicated shift immediate encoding, asr and lsr. + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ false); + AppendToOutput("#%" PRId32, shift_and_lane_size.first); + return 8; + } + default: + VIXL_UNREACHABLE(); + return 0; + } + } else { + AppendToOutput("#0x%" PRIx64, instr->GetImmLogical()); + return 4; + } } case 'N': { // INzcv. int nzcv = (instr->GetNzcv() << Flags_offset); @@ -5442,12 +10129,21 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, AppendToOutput("#%" PRId32, instr->GetImmS()); return 8; } - case 'S': { // IS - Test and branch bit. + case 't': { // It - Test and branch bit. AppendToOutput("#%" PRId32, (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40()); return 2; } + case 'S': { // ISveSvl - SVE 'mul vl' immediate for structured ld/st. + VIXL_ASSERT(strncmp(format, "ISveSvl", 7) == 0); + int imm = instr->ExtractSignedBits(19, 16); + if (imm != 0) { + int reg_count = instr->ExtractBits(22, 21) + 1; + AppendToOutput(", #%d, mul vl", imm * reg_count); + } + return 7; + } case 's': { // Is - Shift (immediate). switch (format[2]) { case '1': { // Is1 - SSHR. @@ -5539,6 +10235,13 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } } return 0; + } else if (strncmp(format, + "IVInsSVEIndex", + strlen("IVInsSVEIndex")) == 0) { + std::pair<int, int> index_and_lane_size = + instr->GetSVEPermuteIndexAndLaneSizeLog2(); + AppendToOutput("%d", index_and_lane_size.first); + return strlen("IVInsSVEIndex"); } VIXL_FALLTHROUGH(); } @@ -5547,27 +10250,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, return 9; } case 'M': { // Modified Immediate cases. - if (strncmp(format, "IVMIImmFPHalf", strlen("IVMIImmFPHalf")) == 0) { - AppendToOutput("#0x%" PRIx32 " (%.4f)", - instr->GetImmNEONabcdefgh(), - FPToFloat(instr->GetImmNEONFP16(), - kIgnoreDefaultNaN)); - return strlen("IVMIImmFPHalf"); - } else if (strncmp(format, - "IVMIImmFPSingle", - strlen("IVMIImmFPSingle")) == 0) { - AppendToOutput("#0x%" PRIx32 " (%.4f)", - instr->GetImmNEONabcdefgh(), - instr->GetImmNEONFP32()); - return strlen("IVMIImmFPSingle"); - } else if (strncmp(format, - "IVMIImmFPDouble", - strlen("IVMIImmFPDouble")) == 0) { - AppendToOutput("#0x%" PRIx32 " (%.4f)", - instr->GetImmNEONabcdefgh(), - instr->GetImmNEONFP64()); - return strlen("IVMIImmFPDouble"); - } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) { + if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) { uint64_t imm8 = instr->GetImmNEONabcdefgh(); AppendToOutput("#0x%" PRIx64, imm8); return strlen("IVMIImm8"); @@ -5647,6 +10330,48 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } } } + case 'p': { // Ipc - SVE predicate constraint specifier. + VIXL_ASSERT(format[2] == 'c'); + unsigned pattern = instr->GetImmSVEPredicateConstraint(); + switch (pattern) { + // VL1-VL8 are encoded directly. + case SVE_VL1: + case SVE_VL2: + case SVE_VL3: + case SVE_VL4: + case SVE_VL5: + case SVE_VL6: + case SVE_VL7: + case SVE_VL8: + AppendToOutput("vl%u", pattern); + break; + // VL16-VL256 are encoded as log2(N) + c. + case SVE_VL16: + case SVE_VL32: + case SVE_VL64: + case SVE_VL128: + case SVE_VL256: + AppendToOutput("vl%u", 16 << (pattern - SVE_VL16)); + break; + // Special cases. + case SVE_POW2: + AppendToOutput("pow2"); + break; + case SVE_MUL4: + AppendToOutput("mul4"); + break; + case SVE_MUL3: + AppendToOutput("mul3"); + break; + case SVE_ALL: + AppendToOutput("all"); + break; + default: + AppendToOutput("#0x%x", pattern); + break; + } + return 3; + } default: { VIXL_UNIMPLEMENTED(); return 0; @@ -5736,11 +10461,11 @@ int Disassembler::SubstituteShiftField(const Instruction *instr, VIXL_ASSERT(instr->GetShiftDP() <= 0x3); switch (format[1]) { - case 'D': { // HDP. + case 'D': { // NDP. VIXL_ASSERT(instr->GetShiftDP() != ROR); VIXL_FALLTHROUGH(); } - case 'L': { // HLo. + case 'L': { // NLo. if (instr->GetImmDPShift() != 0) { const char *shift_type[] = {"lsl", "lsr", "asr", "ror"}; AppendToOutput(", %s #%" PRId32, @@ -5749,6 +10474,14 @@ int Disassembler::SubstituteShiftField(const Instruction *instr, } return 3; } + case 'S': { // NSveS (SVE structured load/store indexing shift). + VIXL_ASSERT(strncmp(format, "NSveS", 5) == 0); + int msz = instr->ExtractBits(24, 23); + if (msz > 0) { + AppendToOutput(", lsl #%d", msz); + } + return 5; + } default: VIXL_UNIMPLEMENTED(); return 0; @@ -5919,30 +10652,43 @@ int Disassembler::SubstituteLSRegOffsetField(const Instruction *instr, int Disassembler::SubstitutePrefetchField(const Instruction *instr, const char *format) { - VIXL_ASSERT(format[0] == 'P'); + VIXL_ASSERT(format[0] == 'p'); USE(format); - static const char *hints[] = {"ld", "li", "st"}; + bool is_sve = + (strncmp(format, "prefSVEOp", strlen("prefSVEOp")) == 0) ? true : false; + int placeholder_length = is_sve ? 9 : 6; static const char *stream_options[] = {"keep", "strm"}; - unsigned hint = instr->GetPrefetchHint(); + auto get_hints = [](bool is_sve) -> std::vector<std::string> { + static const std::vector<std::string> sve_hints = {"ld", "st"}; + static const std::vector<std::string> core_hints = {"ld", "li", "st"}; + return (is_sve) ? sve_hints : core_hints; + }; + + std::vector<std::string> hints = get_hints(is_sve); + unsigned hint = + is_sve ? instr->GetSVEPrefetchHint() : instr->GetPrefetchHint(); unsigned target = instr->GetPrefetchTarget() + 1; unsigned stream = instr->GetPrefetchStream(); - if ((hint >= ArrayLength(hints)) || (target > 3)) { + if ((hint >= hints.size()) || (target > 3)) { // Unallocated prefetch operations. - int prefetch_mode = instr->GetImmPrefetchOperation(); - AppendToOutput("#0b%c%c%c%c%c", - (prefetch_mode & (1 << 4)) ? '1' : '0', - (prefetch_mode & (1 << 3)) ? '1' : '0', - (prefetch_mode & (1 << 2)) ? '1' : '0', - (prefetch_mode & (1 << 1)) ? '1' : '0', - (prefetch_mode & (1 << 0)) ? '1' : '0'); + if (is_sve) { + std::bitset<4> prefetch_mode(instr->GetSVEImmPrefetchOperation()); + AppendToOutput("#0b%s", prefetch_mode.to_string().c_str()); + } else { + std::bitset<5> prefetch_mode(instr->GetImmPrefetchOperation()); + AppendToOutput("#0b%s", prefetch_mode.to_string().c_str()); + } } else { VIXL_ASSERT(stream < ArrayLength(stream_options)); - AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]); + AppendToOutput("p%sl%d%s", + hints[hint].c_str(), + target, + stream_options[stream]); } - return 6; + return placeholder_length; } int Disassembler::SubstituteBarrierField(const Instruction *instr, @@ -5997,6 +10743,159 @@ int Disassembler::SubstituteCrField(const Instruction *instr, return 2; } +int Disassembler::SubstituteIntField(const Instruction *instr, + const char *format) { + VIXL_ASSERT((format[0] == 'u') || (format[0] == 's')); + + // A generic signed or unsigned int field uses a placeholder of the form + // 'sAABB and 'uAABB respectively where AA and BB are two digit bit positions + // between 00 and 31, and AA >= BB. The placeholder is substituted with the + // decimal integer represented by the bits in the instruction between + // positions AA and BB inclusive. + // + // In addition, split fields can be represented using 'sAABB:CCDD, where CCDD + // become the least-significant bits of the result, and bit AA is the sign bit + // (if 's is used). + int32_t bits = 0; + int width = 0; + const char *c = format; + do { + c++; // Skip the 'u', 's' or ':'. + VIXL_ASSERT(strspn(c, "0123456789") == 4); + int msb = ((c[0] - '0') * 10) + (c[1] - '0'); + int lsb = ((c[2] - '0') * 10) + (c[3] - '0'); + c += 4; // Skip the characters we just read. + int chunk_width = msb - lsb + 1; + VIXL_ASSERT((chunk_width > 0) && (chunk_width < 32)); + bits = (bits << chunk_width) | (instr->ExtractBits(msb, lsb)); + width += chunk_width; + } while (*c == ':'); + VIXL_ASSERT(IsUintN(width, bits)); + + if (format[0] == 's') { + bits = ExtractSignedBitfield32(width - 1, 0, bits); + } + + if (*c == '+') { + // A "+n" trailing the format specifier indicates the extracted value should + // be incremented by n. This is for cases where the encoding is zero-based, + // but range of values is not, eg. values [1, 16] encoded as [0, 15] + char *new_c; + uint64_t value = strtoul(c + 1, &new_c, 10); + c = new_c; + VIXL_ASSERT(IsInt32(value)); + bits += value; + } else if (*c == '*') { + // Similarly, a "*n" trailing the format specifier indicates the extracted + // value should be multiplied by n. This is for cases where the encoded + // immediate is scaled, for example by access size. + char *new_c; + uint64_t value = strtoul(c + 1, &new_c, 10); + c = new_c; + VIXL_ASSERT(IsInt32(value)); + bits *= value; + } + + AppendToOutput("%d", bits); + + return static_cast<int>(c - format); +} + +int Disassembler::SubstituteSVESize(const Instruction *instr, + const char *format) { + USE(format); + VIXL_ASSERT(format[0] == 't'); + + static const char sizes[] = {'b', 'h', 's', 'd', 'q'}; + // TODO: only the most common case for <size> is supported at the moment, + // and even then, the RESERVED values are handled as if they're not + // reserved. + unsigned size_in_bytes_log2 = instr->GetSVESize(); + int placeholder_length = 1; + switch (format[1]) { + case 'l': + placeholder_length++; + if (format[2] == 's') { + // 'tls: Loads and stores + size_in_bytes_log2 = instr->ExtractBits(22, 21); + placeholder_length++; + if (format[3] == 's') { + // Sign extension load. + unsigned msize = instr->ExtractBits(24, 23); + if (msize > size_in_bytes_log2) size_in_bytes_log2 ^= 0x3; + placeholder_length++; + } + } else { + // 'tl: Logical operations + size_in_bytes_log2 = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); + } + break; + case 'm': // 'tmsz + VIXL_ASSERT(strncmp(format, "tmsz", 4) == 0); + placeholder_length += 3; + size_in_bytes_log2 = instr->ExtractBits(24, 23); + break; + case 's': + if (format[2] == 'z') { + VIXL_ASSERT((format[3] == 'x') || (format[3] == 's') || + (format[3] == 'p')); + if (format[3] == 'x') { + // 'tszx: Indexes. + std::pair<int, int> index_and_lane_size = + instr->GetSVEPermuteIndexAndLaneSizeLog2(); + size_in_bytes_log2 = index_and_lane_size.second; + } else if (format[3] == 'p') { + // 'tszp: Predicated shifts. + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true); + size_in_bytes_log2 = shift_and_lane_size.second; + } else { + // 'tszs: Unpredicated shifts. + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + size_in_bytes_log2 = shift_and_lane_size.second; + } + placeholder_length += 3; // skip `sz[x|s]` + } + break; + case 'h': + // Half size of the lane size field. + size_in_bytes_log2 -= 1; + placeholder_length++; + break; + case 'q': + // Quarter size of the lane size field. + size_in_bytes_log2 -= 2; + placeholder_length++; + break; + default: + break; + } + + VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(sizes)); + AppendToOutput("%c", sizes[size_in_bytes_log2]); + + return placeholder_length; +} + +int Disassembler::SubstituteTernary(const Instruction *instr, + const char *format) { + VIXL_ASSERT((format[0] == '?') && (format[3] == ':')); + + // The ternary substitution of the format "'?bb:TF" is replaced by a single + // character, either T or F, depending on the value of the bit at position + // bb in the instruction. For example, "'?31:xw" is substituted with "x" if + // bit 31 is true, and "w" otherwise. + VIXL_ASSERT(strspn(&format[1], "0123456789") == 2); + char *c; + uint64_t value = strtoul(&format[1], &c, 10); + VIXL_ASSERT(value < (kInstructionSize * kBitsPerByte)); + VIXL_ASSERT((*c == ':') && (strlen(c) >= 3)); // Minimum of ":TF" + c++; + AppendToOutput("%c", c[1 - instr->ExtractBit(static_cast<int>(value))]); + return 6; +} + void Disassembler::ResetOutput() { buffer_pos_ = 0; buffer_[buffer_pos_] = 0; diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h index c650bee9..b59840aa 100644 --- a/src/aarch64/disasm-aarch64.h +++ b/src/aarch64/disasm-aarch64.h @@ -27,6 +27,8 @@ #ifndef VIXL_AARCH64_DISASM_AARCH64_H #define VIXL_AARCH64_DISASM_AARCH64_H +#include <utility> + #include "../globals-vixl.h" #include "../utils-vixl.h" @@ -112,10 +114,13 @@ class Disassembler : public DecoderVisitor { private: void Format(const Instruction* instr, const char* mnemonic, - const char* format); + const char* format0, + const char* format1 = NULL); void Substitute(const Instruction* instr, const char* string); int SubstituteField(const Instruction* instr, const char* format); int SubstituteRegisterField(const Instruction* instr, const char* format); + int SubstitutePredicateRegisterField(const Instruction* instr, + const char* format); int SubstituteImmediateField(const Instruction* instr, const char* format); int SubstituteLiteralField(const Instruction* instr, const char* format); int SubstituteBitfieldImmediateField(const Instruction* instr, @@ -130,6 +135,14 @@ class Disassembler : public DecoderVisitor { int SubstituteBarrierField(const Instruction* instr, const char* format); int SubstituteSysOpField(const Instruction* instr, const char* format); int SubstituteCrField(const Instruction* instr, const char* format); + int SubstituteIntField(const Instruction* instr, const char* format); + int SubstituteSVESize(const Instruction* instr, const char* format); + int SubstituteTernary(const Instruction* instr, const char* format); + + std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr, + char reg_prefix, + const char* field); + bool RdIsZROrSP(const Instruction* instr) const { return (instr->GetRd() == kZeroRegCode); } diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc index a99a0459..b3e28384 100644 --- a/src/aarch64/instructions-aarch64.cc +++ b/src/aarch64/instructions-aarch64.cc @@ -35,7 +35,8 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size, unsigned width) { VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) || (width == 32)); - VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) || + (reg_size == kSRegSize) || (reg_size == kDRegSize)); uint64_t result = value & ((UINT64_C(1) << width) - 1); for (unsigned i = width; i < reg_size; i *= 2) { result |= (result << i); @@ -43,6 +44,503 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size, return result; } +bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const { + bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z; + bool movprfx_is_unpredicated = + movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z; + VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated); + + int movprfx_zd = movprfx->GetRd(); + int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1; + VectorFormat movprfx_vform = + movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined; + + bool pg_matches_low8 = movprfx_pg == GetPgLow8(); + bool vform_matches = movprfx_vform == GetSVEVectorFormat(); + bool zd_matches = movprfx_zd == GetRd(); + bool zd_matches_zm = movprfx_zd == GetRm(); + bool zd_matches_zn = movprfx_zd == GetRn(); + + switch (Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) { + case AND_z_zi: + case EOR_z_zi: + case ORR_z_zi: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEBitwiseLogical_PredicatedMask)) { + case AND_z_p_zz: + case BIC_z_p_zz: + case EOR_z_p_zz: + case ORR_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEBitwiseShiftByImm_PredicatedMask)) { + case ASRD_z_p_zi: + case ASR_z_p_zi: + case LSL_z_p_zi: + case LSR_z_p_zi: + if (movprfx_is_predicated) { + if (!pg_matches_low8) return false; + unsigned tsz = ExtractBits<0x00c00300>(); + VectorFormat instr_vform = + SVEFormatFromLaneSizeInBytesLog2(HighestSetBitPosition(tsz)); + if (movprfx_vform != instr_vform) return false; + } + return zd_matches; + } + switch (Mask(SVEBitwiseShiftByVector_PredicatedMask)) { + case ASRR_z_p_zz: + case ASR_z_p_zz: + case LSLR_z_p_zz: + case LSL_z_p_zz: + case LSRR_z_p_zz: + case LSR_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { + case ASR_z_p_zw: + case LSL_z_p_zw: + case LSR_z_p_zw: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEConditionallyBroadcastElementToVectorMask)) { + case CLASTA_z_p_zz: + case CLASTB_z_p_zz: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVECopyFPImm_PredicatedMask)) { + case FCPY_z_p_i: + if (movprfx_is_predicated) { + if (!vform_matches) return false; + if (movprfx_pg != GetRx<19, 16>()) return false; + } + return zd_matches; + } + switch (Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { + case CPY_z_p_r: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches; + } + switch (Mask(SVECopyIntImm_PredicatedMask)) { + case CPY_z_p_i: + if (movprfx_is_predicated) { + if (!vform_matches) return false; + if (movprfx_pg != GetRx<19, 16>()) return false; + } + // Only the merging form can take movprfx. + if (ExtractBit(14) == 0) return false; + return zd_matches; + } + switch (Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { + case CPY_z_p_v: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEFPArithmeticWithImm_PredicatedMask)) { + case FADD_z_p_zs: + case FMAXNM_z_p_zs: + case FMAX_z_p_zs: + case FMINNM_z_p_zs: + case FMIN_z_p_zs: + case FMUL_z_p_zs: + case FSUBR_z_p_zs: + case FSUB_z_p_zs: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches; + } + switch (Mask(SVEFPArithmetic_PredicatedMask)) { + case FABD_z_p_zz: + case FADD_z_p_zz: + case FDIVR_z_p_zz: + case FDIV_z_p_zz: + case FMAXNM_z_p_zz: + case FMAX_z_p_zz: + case FMINNM_z_p_zz: + case FMIN_z_p_zz: + case FMULX_z_p_zz: + case FMUL_z_p_zz: + case FSCALE_z_p_zz: + case FSUBR_z_p_zz: + case FSUB_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEFPComplexAdditionMask)) { + case FCADD_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEFPComplexMulAddIndexMask)) { + case FCMLA_z_zzzi_h: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<18, 16>()) return false; + return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; + case FCMLA_z_zzzi_s: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<19, 16>()) return false; + return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; + } + switch (Mask(SVEFPComplexMulAddMask)) { + case FCMLA_z_p_zzz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zm && !zd_matches_zn; + } + switch (Mask(SVEFPConvertPrecisionMask)) { + case FCVT_z_p_z_d2h: + case FCVT_z_p_z_d2s: + case FCVT_z_p_z_h2d: + case FCVT_z_p_z_h2s: + case FCVT_z_p_z_s2d: + case FCVT_z_p_z_s2h: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEFPConvertToIntMask)) { + case FCVTZS_z_p_z_d2w: + case FCVTZS_z_p_z_d2x: + case FCVTZS_z_p_z_fp162h: + case FCVTZS_z_p_z_fp162w: + case FCVTZS_z_p_z_fp162x: + case FCVTZS_z_p_z_s2w: + case FCVTZS_z_p_z_s2x: + case FCVTZU_z_p_z_d2w: + case FCVTZU_z_p_z_d2x: + case FCVTZU_z_p_z_fp162h: + case FCVTZU_z_p_z_fp162w: + case FCVTZU_z_p_z_fp162x: + case FCVTZU_z_p_z_s2w: + case FCVTZU_z_p_z_s2x: + if (movprfx_is_predicated) { + if (!pg_matches_low8) return false; + // The movprfx element size must match the instruction's maximum encoded + // element size. We have to partially decode the opc and opc2 fields to + // find this. + unsigned opc = ExtractBits(23, 22); + unsigned opc2 = ExtractBits(18, 17); + VectorFormat instr_vform = + SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2)); + if (movprfx_vform != instr_vform) return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEFPMulAddIndexMask)) { + case FMLA_z_zzzi_h: + case FMLA_z_zzzi_h_i3h: + case FMLA_z_zzzi_s: + case FMLS_z_zzzi_h: + case FMLS_z_zzzi_h_i3h: + case FMLS_z_zzzi_s: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<18, 16>()) return false; + return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; + case FMLA_z_zzzi_d: + case FMLS_z_zzzi_d: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<19, 16>()) return false; + return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; + } + switch (Mask(SVEFPMulAddMask)) { + case FMAD_z_p_zzz: + case FMSB_z_p_zzz: + case FNMAD_z_p_zzz: + case FNMSB_z_p_zzz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<20, 16>()) return false; + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + case FMLA_z_p_zzz: + case FMLS_z_p_zzz: + case FNMLA_z_p_zzz: + case FNMLS_z_p_zzz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zm && !zd_matches_zn; + } + switch (Mask(SVEFPRoundToIntegralValueMask)) { + case FRINTA_z_p_z: + case FRINTI_z_p_z: + case FRINTM_z_p_z: + case FRINTN_z_p_z: + case FRINTP_z_p_z: + case FRINTX_z_p_z: + case FRINTZ_z_p_z: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEFPTrigMulAddCoefficientMask)) { + case FTMAD_z_zzi: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEFPUnaryOpMask)) { + case FRECPX_z_p_z: + case FSQRT_z_p_z: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEIncDecByPredicateCountMask)) { + case DECP_z_p_z: + case INCP_z_p_z: + case SQDECP_z_p_z: + case SQINCP_z_p_z: + case UQDECP_z_p_z: + case UQINCP_z_p_z: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEIncDecVectorByElementCountMask)) { + case DECD_z_zs: + case DECH_z_zs: + case DECW_z_zs: + case INCD_z_zs: + case INCH_z_zs: + case INCW_z_zs: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEInsertGeneralRegisterMask)) { + case INSR_z_r: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEInsertSIMDFPScalarRegisterMask)) { + case INSR_z_v: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { + case ADD_z_zi: + case SQADD_z_zi: + case SQSUB_z_zi: + case SUBR_z_zi: + case SUB_z_zi: + case UQADD_z_zi: + case UQSUB_z_zi: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEIntAddSubtractVectors_PredicatedMask)) { + case ADD_z_p_zz: + case SUBR_z_p_zz: + case SUB_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEIntConvertToFPMask)) { + case SCVTF_z_p_z_h2fp16: + case SCVTF_z_p_z_w2d: + case SCVTF_z_p_z_w2fp16: + case SCVTF_z_p_z_w2s: + case SCVTF_z_p_z_x2d: + case SCVTF_z_p_z_x2fp16: + case SCVTF_z_p_z_x2s: + case UCVTF_z_p_z_h2fp16: + case UCVTF_z_p_z_w2d: + case UCVTF_z_p_z_w2fp16: + case UCVTF_z_p_z_w2s: + case UCVTF_z_p_z_x2d: + case UCVTF_z_p_z_x2fp16: + case UCVTF_z_p_z_x2s: + if (movprfx_is_predicated) { + if (!pg_matches_low8) return false; + // The movprfx element size must match the instruction's maximum encoded + // element size. We have to partially decode the opc and opc2 fields to + // find this. + unsigned opc = ExtractBits(23, 22); + unsigned opc2 = ExtractBits(18, 17); + VectorFormat instr_vform = + SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2)); + if (movprfx_vform != instr_vform) return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEIntDivideVectors_PredicatedMask)) { + case SDIVR_z_p_zz: + case SDIV_z_p_zz: + case UDIVR_z_p_zz: + case UDIV_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEIntMinMaxDifference_PredicatedMask)) { + case SABD_z_p_zz: + case SMAX_z_p_zz: + case SMIN_z_p_zz: + case UABD_z_p_zz: + case UMAX_z_p_zz: + case UMIN_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEIntMinMaxImm_UnpredicatedMask)) { + case SMAX_z_zi: + case SMIN_z_zi: + case UMAX_z_zi: + case UMIN_z_zi: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEIntMulAddPredicatedMask)) { + case MAD_z_p_zzz: + case MSB_z_p_zzz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches && !zd_matches_zm; + case MLA_z_p_zzz: + case MLS_z_p_zzz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zm && !zd_matches_zn; + } + switch (Mask(SVEIntMulAddUnpredicatedMask)) { + case SDOT_z_zzz: + case UDOT_z_zzz: + return movprfx_is_unpredicated && zd_matches && !zd_matches_zm && + !zd_matches_zn; + } + switch (Mask(SVEIntMulImm_UnpredicatedMask)) { + case MUL_z_zi: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEIntMulVectors_PredicatedMask)) { + case MUL_z_p_zz: + case SMULH_z_p_zz: + case UMULH_z_p_zz: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return zd_matches; + } + switch (Mask(SVEIntUnaryArithmeticPredicatedMask)) { + case ABS_z_p_z: + case CLS_z_p_z: + case CLZ_z_p_z: + case CNOT_z_p_z: + case CNT_z_p_z: + case FABS_z_p_z: + case FNEG_z_p_z: + case NEG_z_p_z: + case NOT_z_p_z: + case SXTB_z_p_z: + case SXTH_z_p_z: + case SXTW_z_p_z: + case UXTB_z_p_z: + case UXTH_z_p_z: + case UXTW_z_p_z: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVEMulIndexMask)) { + case SDOT_z_zzzi_s: + case UDOT_z_zzzi_s: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<18, 16>()) return false; + return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; + case SDOT_z_zzzi_d: + case UDOT_z_zzzi_d: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<19, 16>()) return false; + return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; + } + switch (Mask(SVEPermuteVectorExtractMask)) { + case EXT_z_zi_des: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEReverseWithinElementsMask)) { + case RBIT_z_p_z: + case REVB_z_z: + case REVH_z_z: + case REVW_z_z: + if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { + return false; + } + return zd_matches && !zd_matches_zn; + } + switch (Mask(SVESaturatingIncDecVectorByElementCountMask)) { + case SQDECD_z_zs: + case SQDECH_z_zs: + case SQDECW_z_zs: + case SQINCD_z_zs: + case SQINCH_z_zs: + case SQINCW_z_zs: + case UQDECD_z_zs: + case UQDECH_z_zs: + case UQDECW_z_zs: + case UQINCD_z_zs: + case UQINCH_z_zs: + case UQINCW_z_zs: + return movprfx_is_unpredicated && zd_matches; + } + switch (Mask(SVEVectorSplice_DestructiveMask)) { + case SPLICE_z_p_zz_des: + // The movprfx's `zd` must not alias any other inputs. + if (movprfx_zd == GetRx<9, 5>()) return false; + return movprfx_is_unpredicated && zd_matches; + } + return false; +} // NOLINT(readability/fn_size) bool Instruction::IsLoad() const { if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { @@ -103,6 +601,16 @@ bool Instruction::IsStore() const { } +std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const { + uint32_t imm_2 = ExtractBits<0x00C00000>(); + uint32_t tsz_5 = ExtractBits<0x001F0000>(); + uint32_t imm_7 = (imm_2 << 5) | tsz_5; + int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5); + int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7); + return std::make_pair(index, lane_size_in_byte_log_2); +} + + // Logical immediates can't encode zero, so a return value of zero is used to // indicate a failure case. Specifically, where the constraints on imm_s are // not met. @@ -111,7 +619,108 @@ uint64_t Instruction::GetImmLogical() const { int32_t n = GetBitN(); int32_t imm_s = GetImmSetBits(); int32_t imm_r = GetImmRotate(); + return DecodeImmBitMask(n, imm_s, imm_r, reg_size); +} + +// Logical immediates can't encode zero, so a return value of zero is used to +// indicate a failure case. Specifically, where the constraints on imm_s are +// not met. +uint64_t Instruction::GetSVEImmLogical() const { + int n = GetSVEBitN(); + int imm_s = GetSVEImmSetBits(); + int imm_r = GetSVEImmRotate(); + int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2(); + switch (lane_size_in_bytes_log2) { + case kDRegSizeInBytesLog2: + case kSRegSizeInBytesLog2: + case kHRegSizeInBytesLog2: + case kBRegSizeInBytesLog2: { + int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3); + return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits); + } + default: + return 0; + } +} +std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2( + bool is_predicated) const { + Instr tsize = + is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>(); + Instr imm_3 = + is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>(); + if (tsize == 0) { + // The bit field `tsize` means undefined if it is zero, so return a + // convenience value kWMinInt to indicate a failure case. + return std::make_pair(kWMinInt, kWMinInt); + } + + int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1; + int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte; + int shift = (2 * esize) - ((tsize << 3) | imm_3); + return std::make_pair(shift, lane_size_in_bytes_log_2); +} + +int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const { + Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb); + if (is_signed) { + dtype_h = dtype_h ^ 0x3; + } + return dtype_h; +} + +int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const { + Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb); + if (is_signed) { + dtype_l = dtype_l ^ 0x3; + } + return dtype_l; +} + +int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const { + int n = GetSVEBitN(); + int imm_s = GetSVEImmSetBits(); + unsigned type_bitset = + (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width)); + + // An lane size is constructed from the n and imm_s bits according to + // the following table: + // + // N imms size + // 0 0xxxxx 32 + // 0 10xxxx 16 + // 0 110xxx 8 + // 0 1110xx 8 + // 0 11110x 8 + // 1 xxxxxx 64 + + if (type_bitset == 0) { + // Bail out early since `HighestSetBitPosition` doesn't accept zero + // value input. + return -1; + } + + switch (HighestSetBitPosition(type_bitset)) { + case 6: + return kDRegSizeInBytesLog2; + case 5: + return kSRegSizeInBytesLog2; + case 4: + return kHRegSizeInBytesLog2; + case 3: + case 2: + case 1: + return kBRegSizeInBytesLog2; + default: + // RESERVED encoding. + return -1; + } +} + +uint64_t Instruction::DecodeImmBitMask(int32_t n, + int32_t imm_s, + int32_t imm_r, + int32_t size) const { // An integer is constructed from the n, imm_s and imm_r bits according to // the following table: // @@ -146,7 +755,7 @@ uint64_t Instruction::GetImmLogical() const { return 0; } uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1; - return RepeatBitsAcrossReg(reg_size, + return RepeatBitsAcrossReg(size, RotateRight(bits, imm_r & mask, width), width); } @@ -397,8 +1006,6 @@ void Instruction::SetImmLLiteral(const Instruction* source) { VectorFormat VectorFormatHalfWidth(VectorFormat vform) { - VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D || - vform == kFormatH || vform == kFormatS || vform == kFormatD); switch (vform) { case kFormat8H: return kFormat8B; @@ -412,6 +1019,13 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) { return kFormatH; case kFormatD: return kFormatS; + case kFormatVnH: + return kFormatVnB; + case kFormatVnS: + return kFormatVnH; + case kFormatVnD: + return kFormatVnS; + break; default: VIXL_UNREACHABLE(); return kFormatUndefined; @@ -480,6 +1094,12 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) { return kFormat2S; case kFormat2D: return kFormat4S; + case kFormatVnH: + return kFormatVnB; + case kFormatVnS: + return kFormatVnH; + case kFormatVnD: + return kFormatVnS; default: VIXL_UNREACHABLE(); return kFormatUndefined; @@ -518,8 +1138,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) { } -VectorFormat ScalarFormatFromLaneSize(int laneSize) { - switch (laneSize) { +VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) { + switch (lane_size_in_bits) { case 8: return kFormatB; case 16: @@ -535,6 +1155,69 @@ VectorFormat ScalarFormatFromLaneSize(int laneSize) { } +bool IsSVEFormat(VectorFormat vform) { + switch (vform) { + case kFormatVnB: + case kFormatVnH: + case kFormatVnS: + case kFormatVnD: + case kFormatVnQ: + return true; + default: + return false; + } +} + + +VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) { + switch (lane_size_in_bytes) { + case 1: + return kFormatVnB; + case 2: + return kFormatVnH; + case 4: + return kFormatVnS; + case 8: + return kFormatVnD; + case 16: + return kFormatVnQ; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) { + switch (lane_size_in_bits) { + case 8: + case 16: + case 32: + case 64: + case 128: + return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte); + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) { + switch (lane_size_in_bytes_log2) { + case 0: + case 1: + case 2: + case 3: + case 4: + return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2); + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + VectorFormat ScalarFormatFromFormat(VectorFormat vform) { return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); } @@ -542,6 +1225,7 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) { unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) { VIXL_ASSERT(vform != kFormatUndefined); + VIXL_ASSERT(!IsSVEFormat(vform)); switch (vform) { case kFormatB: return kBRegSize; @@ -551,14 +1235,19 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) { case kFormat2H: return kSRegSize; case kFormatD: - return kDRegSize; case kFormat8B: case kFormat4H: case kFormat2S: case kFormat1D: return kDRegSize; - default: + case kFormat16B: + case kFormat8H: + case kFormat4S: + case kFormat2D: return kQRegSize; + default: + VIXL_UNREACHABLE(); + return 0; } } @@ -574,20 +1263,26 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) { case kFormatB: case kFormat8B: case kFormat16B: + case kFormatVnB: return 8; case kFormatH: case kFormat2H: case kFormat4H: case kFormat8H: + case kFormatVnH: return 16; case kFormatS: case kFormat2S: case kFormat4S: + case kFormatVnS: return 32; case kFormatD: case kFormat1D: case kFormat2D: + case kFormatVnD: return 64; + case kFormatVnQ: + return 128; default: VIXL_UNREACHABLE(); return 0; @@ -606,20 +1301,26 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) { case kFormatB: case kFormat8B: case kFormat16B: + case kFormatVnB: return 0; case kFormatH: case kFormat2H: case kFormat4H: case kFormat8H: + case kFormatVnH: return 1; case kFormatS: case kFormat2S: case kFormat4S: + case kFormatVnS: return 2; case kFormatD: case kFormat1D: case kFormat2D: + case kFormatVnD: return 3; + case kFormatVnQ: + return 4; default: VIXL_UNREACHABLE(); return 0; @@ -697,17 +1398,19 @@ bool IsVectorFormat(VectorFormat vform) { int64_t MaxIntFromFormat(VectorFormat vform) { - return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform)); + int lane_size = LaneSizeInBitsFromFormat(vform); + return static_cast<int64_t>(GetUintMask(lane_size) >> 1); } int64_t MinIntFromFormat(VectorFormat vform) { - return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform)); + return -MaxIntFromFormat(vform) - 1; } uint64_t MaxUintFromFormat(VectorFormat vform) { - return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform)); + return GetUintMask(LaneSizeInBitsFromFormat(vform)); } + } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h index 6d4f96b4..5f56ae16 100644 --- a/src/aarch64/instructions-aarch64.h +++ b/src/aarch64/instructions-aarch64.h @@ -81,6 +81,7 @@ const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff); const uint64_t kHRegMask = UINT64_C(0xffff); const uint64_t kSRegMask = UINT64_C(0xffffffff); const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff); +const uint64_t kHSignMask = UINT64_C(0x8000); const uint64_t kSSignMask = UINT64_C(0x80000000); const uint64_t kDSignMask = UINT64_C(0x8000000000000000); const uint64_t kWSignMask = UINT64_C(0x80000000); @@ -116,6 +117,30 @@ VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000)); const uint64_t kTTBRMask = UINT64_C(1) << 55; +// We can't define a static kZRegSize because the size depends on the +// implementation. However, it is sometimes useful to know the minimum and +// maxmimum possible sizes. +const unsigned kZRegMinSize = 128; +const unsigned kZRegMinSizeLog2 = 7; +const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8; +const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3; +const unsigned kZRegMaxSize = 2048; +const unsigned kZRegMaxSizeLog2 = 11; +const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8; +const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3; + +// The P register size depends on the Z register size. +const unsigned kZRegBitsPerPRegBit = kBitsPerByte; +const unsigned kZRegBitsPerPRegBitLog2 = 3; +const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit; +const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3; +const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8; +const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3; +const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit; +const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3; +const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8; +const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3; + // Make these moved float constants backwards compatible // with explicit vixl::aarch64:: namespace references. using vixl::kDoubleMantissaBits; @@ -151,6 +176,44 @@ enum AddrMode { Offset, PreIndex, PostIndex }; enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister }; +enum VectorFormat { + kFormatUndefined = 0xffffffff, + kFormat8B = NEON_8B, + kFormat16B = NEON_16B, + kFormat4H = NEON_4H, + kFormat8H = NEON_8H, + kFormat2S = NEON_2S, + kFormat4S = NEON_4S, + kFormat1D = NEON_1D, + kFormat2D = NEON_2D, + + // Scalar formats. We add the scalar bit to distinguish between scalar and + // vector enumerations; the bit is always set in the encoding of scalar ops + // and always clear for vector ops. Although kFormatD and kFormat1D appear + // to be the same, their meaning is subtly different. The first is a scalar + // operation, the second a vector operation that only affects one lane. + kFormatB = NEON_B | NEONScalar, + kFormatH = NEON_H | NEONScalar, + kFormatS = NEON_S | NEONScalar, + kFormatD = NEON_D | NEONScalar, + + // An artificial value, used to distinguish from NEON format category. + kFormatSVE = 0x0000fffd, + // An artificial value. Q lane size isn't encoded in the usual size field. + kFormatSVEQ = 0x000f0000, + // Vector element width of SVE register with the unknown lane count since + // the vector length is implementation dependent. + kFormatVnB = SVE_B | kFormatSVE, + kFormatVnH = SVE_H | kFormatSVE, + kFormatVnS = SVE_S | kFormatSVE, + kFormatVnD = SVE_D | kFormatSVE, + kFormatVnQ = kFormatSVEQ | kFormatSVE, + + // An artificial value, used by simulator trace tests and a few oddball + // instructions (such as FMLAL). + kFormat2H = 0xfffffffe +}; + // Instructions. --------------------------------------------------------------- class Instruction { @@ -229,6 +292,29 @@ class Instruction { INSTRUCTION_FIELDS_LIST(DEFINE_GETTER) #undef DEFINE_GETTER + template <int msb, int lsb> + int32_t GetRx() const { + // We don't have any register fields wider than five bits, so the result + // will always fit into an int32_t. + VIXL_ASSERT((msb - lsb + 1) <= 5); + return this->ExtractBits(msb, lsb); + } + + VectorFormat GetSVEVectorFormat() const { + switch (Mask(SVESizeFieldMask)) { + case SVE_B: + return kFormatVnB; + case SVE_H: + return kFormatVnH; + case SVE_S: + return kFormatVnS; + case SVE_D: + return kFormatVnD; + } + VIXL_UNREACHABLE(); + return kFormatUndefined; + } + // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), // formed from ImmPCRelLo and ImmPCRelHi. int GetImmPCRel() const { @@ -254,6 +340,20 @@ class Instruction { VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) { return GetImmLogical(); } + uint64_t GetSVEImmLogical() const; + int GetSVEBitwiseImmLaneSizeInBytesLog2() const; + uint64_t DecodeImmBitMask(int32_t n, + int32_t imm_s, + int32_t imm_r, + int32_t size) const; + + std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const; + + std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const; + + int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const; + + int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const; unsigned GetImmNEONabcdefgh() const; VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) { @@ -280,6 +380,16 @@ class Instruction { return GetImmNEONFP64(); } + Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); } + + float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); } + + double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); } + + static Float16 Imm8ToFloat16(uint32_t imm8); + static float Imm8ToFP32(uint32_t imm8); + static double Imm8ToFP64(uint32_t imm8); + unsigned GetSizeLS() const { return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask))); } @@ -342,6 +452,9 @@ class Instruction { return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed; } + // True if `this` is valid immediately after the provided movprfx instruction. + bool CanTakeSVEMovprfx(Instruction const* movprfx) const; + bool IsLoad() const; bool IsStore() const; @@ -557,41 +670,12 @@ class Instruction { private: int GetImmBranch() const; - static Float16 Imm8ToFloat16(uint32_t imm8); - static float Imm8ToFP32(uint32_t imm8); - static double Imm8ToFP64(uint32_t imm8); - void SetPCRelImmTarget(const Instruction* target); void SetBranchImmTarget(const Instruction* target); }; -// Functions for handling NEON vector format information. -enum VectorFormat { - kFormatUndefined = 0xffffffff, - kFormat8B = NEON_8B, - kFormat16B = NEON_16B, - kFormat4H = NEON_4H, - kFormat8H = NEON_8H, - kFormat2S = NEON_2S, - kFormat4S = NEON_4S, - kFormat1D = NEON_1D, - kFormat2D = NEON_2D, - - // Scalar formats. We add the scalar bit to distinguish between scalar and - // vector enumerations; the bit is always set in the encoding of scalar ops - // and always clear for vector ops. Although kFormatD and kFormat1D appear - // to be the same, their meaning is subtly different. The first is a scalar - // operation, the second a vector operation that only affects one lane. - kFormatB = NEON_B | NEONScalar, - kFormatH = NEON_H | NEONScalar, - kFormatS = NEON_S | NEONScalar, - kFormatD = NEON_D | NEONScalar, - - // An artificial value, used by simulator trace tests and a few oddball - // instructions (such as FMLAL). - kFormat2H = 0xfffffffe -}; +// Functions for handling NEON and SVE vector format information. const int kMaxLanesPerVector = 16; @@ -599,12 +683,16 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform); VectorFormat VectorFormatDoubleWidth(VectorFormat vform); VectorFormat VectorFormatDoubleLanes(VectorFormat vform); VectorFormat VectorFormatHalfLanes(VectorFormat vform); -VectorFormat ScalarFormatFromLaneSize(int lanesize); +VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits); VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform); VectorFormat VectorFormatFillQ(VectorFormat vform); VectorFormat ScalarFormatFromFormat(VectorFormat vform); +VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits); +VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes); +VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2); unsigned RegisterSizeInBitsFromFormat(VectorFormat vform); unsigned RegisterSizeInBytesFromFormat(VectorFormat vform); +bool IsSVEFormat(VectorFormat vform); // TODO: Make the return types of these functions consistent. unsigned LaneSizeInBitsFromFormat(VectorFormat vform); int LaneSizeInBytesFromFormat(VectorFormat vform); diff --git a/src/aarch64/instrument-aarch64.cc b/src/aarch64/instrument-aarch64.cc deleted file mode 100644 index 7cb6b20e..00000000 --- a/src/aarch64/instrument-aarch64.cc +++ /dev/null @@ -1,975 +0,0 @@ -// Copyright 2014, VIXL authors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of ARM Limited nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "instrument-aarch64.h" - -namespace vixl { -namespace aarch64 { - -Counter::Counter(const char* name, CounterType type) - : count_(0), enabled_(false), type_(type) { - VIXL_ASSERT(name != NULL); - strncpy(name_, name, kCounterNameMaxLength - 1); - // Make sure `name_` is always NULL-terminated, even if the source's length is - // higher. - name_[kCounterNameMaxLength - 1] = '\0'; -} - - -void Counter::Enable() { enabled_ = true; } - - -void Counter::Disable() { enabled_ = false; } - - -bool Counter::IsEnabled() { return enabled_; } - - -void Counter::Increment() { - if (enabled_) { - count_++; - } -} - - -uint64_t Counter::GetCount() { - uint64_t result = count_; - if (type_ == Gauge) { - // If the counter is a Gauge, reset the count after reading. - count_ = 0; - } - return result; -} - - -const char* Counter::GetName() { return name_; } - - -CounterType Counter::GetType() { return type_; } - - -struct CounterDescriptor { - const char* name; - CounterType type; -}; - - -static const CounterDescriptor kCounterList[] = - {{"Instruction", Cumulative}, - - {"Move Immediate", Gauge}, - {"Add/Sub DP", Gauge}, - {"Logical DP", Gauge}, - {"Other Int DP", Gauge}, - {"FP DP", Gauge}, - - {"Conditional Select", Gauge}, - {"Conditional Compare", Gauge}, - - {"Unconditional Branch", Gauge}, - {"Compare and Branch", Gauge}, - {"Test and Branch", Gauge}, - {"Conditional Branch", Gauge}, - - {"Load Integer", Gauge}, - {"Load FP", Gauge}, - {"Load Pair", Gauge}, - {"Load Literal", Gauge}, - - {"Store Integer", Gauge}, - {"Store FP", Gauge}, - {"Store Pair", Gauge}, - - {"PC Addressing", Gauge}, - {"Other", Gauge}, - {"NEON", Gauge}, - {"Crypto", Gauge}}; - - -Instrument::Instrument(const char* datafile, uint64_t sample_period) - : output_stream_(stdout), sample_period_(sample_period) { - // Set up the output stream. If datafile is non-NULL, use that file. If it - // can't be opened, or datafile is NULL, use stdout. - if (datafile != NULL) { - output_stream_ = fopen(datafile, "w"); - if (output_stream_ == NULL) { - printf("Can't open output file %s. Using stdout.\n", datafile); - output_stream_ = stdout; - } - } - - static const int num_counters = - sizeof(kCounterList) / sizeof(CounterDescriptor); - - // Dump an instrumentation description comment at the top of the file. - fprintf(output_stream_, "# counters=%d\n", num_counters); - fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_); - - // Construct Counter objects from counter description array. - for (int i = 0; i < num_counters; i++) { - Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type); - counters_.push_back(counter); - } - - DumpCounterNames(); -} - - -Instrument::~Instrument() { - // Dump any remaining instruction data to the output file. - DumpCounters(); - - // Free all the counter objects. - std::list<Counter*>::iterator it; - for (it = counters_.begin(); it != counters_.end(); it++) { - delete *it; - } - - if (output_stream_ != stdout) { - fclose(output_stream_); - } -} - - -void Instrument::Update() { - // Increment the instruction counter, and dump all counters if a sample period - // has elapsed. - static Counter* counter = GetCounter("Instruction"); - VIXL_ASSERT(counter->GetType() == Cumulative); - counter->Increment(); - - if ((sample_period_ != 0) && counter->IsEnabled() && - (counter->GetCount() % sample_period_) == 0) { - DumpCounters(); - } -} - - -void Instrument::DumpCounters() { - // Iterate through the counter objects, dumping their values to the output - // stream. - std::list<Counter*>::const_iterator it; - for (it = counters_.begin(); it != counters_.end(); it++) { - fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount()); - } - fprintf(output_stream_, "\n"); - fflush(output_stream_); -} - - -void Instrument::DumpCounterNames() { - // Iterate through the counter objects, dumping the counter names to the - // output stream. - std::list<Counter*>::const_iterator it; - for (it = counters_.begin(); it != counters_.end(); it++) { - fprintf(output_stream_, "%s,", (*it)->GetName()); - } - fprintf(output_stream_, "\n"); - fflush(output_stream_); -} - - -void Instrument::HandleInstrumentationEvent(unsigned event) { - switch (event) { - case InstrumentStateEnable: - Enable(); - break; - case InstrumentStateDisable: - Disable(); - break; - default: - DumpEventMarker(event); - } -} - - -void Instrument::DumpEventMarker(unsigned marker) { - // Dumpan event marker to the output stream as a specially formatted comment - // line. - static Counter* counter = GetCounter("Instruction"); - - fprintf(output_stream_, - "# %c%c @ %" PRId64 "\n", - marker & 0xff, - (marker >> 8) & 0xff, - counter->GetCount()); -} - - -Counter* Instrument::GetCounter(const char* name) { - // Get a Counter object by name from the counter list. - std::list<Counter*>::const_iterator it; - for (it = counters_.begin(); it != counters_.end(); it++) { - if (strcmp((*it)->GetName(), name) == 0) { - return *it; - } - } - - // A Counter by that name does not exist: print an error message to stderr - // and the output file, and exit. - static const char* error_message = - "# Error: Unknown counter \"%s\". Exiting.\n"; - fprintf(stderr, error_message, name); - fprintf(output_stream_, error_message, name); - exit(1); -} - - -void Instrument::Enable() { - std::list<Counter*>::iterator it; - for (it = counters_.begin(); it != counters_.end(); it++) { - (*it)->Enable(); - } -} - - -void Instrument::Disable() { - std::list<Counter*>::iterator it; - for (it = counters_.begin(); it != counters_.end(); it++) { - (*it)->Disable(); - } -} - - -void Instrument::VisitPCRelAddressing(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("PC Addressing"); - counter->Increment(); -} - - -void Instrument::VisitAddSubImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Add/Sub DP"); - counter->Increment(); -} - - -void Instrument::VisitLogicalImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Logical DP"); - counter->Increment(); -} - - -void Instrument::VisitMoveWideImmediate(const Instruction* instr) { - Update(); - static Counter* counter = GetCounter("Move Immediate"); - - if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) { - unsigned imm = instr->GetImmMoveWide(); - HandleInstrumentationEvent(imm); - } else { - counter->Increment(); - } -} - - -void Instrument::VisitBitfield(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other Int DP"); - counter->Increment(); -} - - -void Instrument::VisitExtract(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other Int DP"); - counter->Increment(); -} - - -void Instrument::VisitUnconditionalBranch(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Unconditional Branch"); - counter->Increment(); -} - - -void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Unconditional Branch"); - counter->Increment(); -} - - -void Instrument::VisitCompareBranch(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Compare and Branch"); - counter->Increment(); -} - - -void Instrument::VisitTestBranch(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Test and Branch"); - counter->Increment(); -} - - -void Instrument::VisitConditionalBranch(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Conditional Branch"); - counter->Increment(); -} - - -void Instrument::VisitSystem(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitException(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::InstrumentLoadStorePair(const Instruction* instr) { - static Counter* load_pair_counter = GetCounter("Load Pair"); - static Counter* store_pair_counter = GetCounter("Store Pair"); - - if (instr->Mask(LoadStorePairLBit) != 0) { - load_pair_counter->Increment(); - } else { - store_pair_counter->Increment(); - } -} - - -void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) { - Update(); - InstrumentLoadStorePair(instr); -} - - -void Instrument::VisitLoadStorePairOffset(const Instruction* instr) { - Update(); - InstrumentLoadStorePair(instr); -} - - -void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) { - Update(); - InstrumentLoadStorePair(instr); -} - - -void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) { - Update(); - InstrumentLoadStorePair(instr); -} - - -void Instrument::VisitLoadStoreExclusive(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitAtomicMemory(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitLoadLiteral(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Load Literal"); - counter->Increment(); -} - - -void Instrument::VisitLoadStorePAC(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Load Integer"); - counter->Increment(); -} - - -void Instrument::InstrumentLoadStore(const Instruction* instr) { - static Counter* load_int_counter = GetCounter("Load Integer"); - static Counter* store_int_counter = GetCounter("Store Integer"); - static Counter* load_fp_counter = GetCounter("Load FP"); - static Counter* store_fp_counter = GetCounter("Store FP"); - - switch (instr->Mask(LoadStoreMask)) { - case STRB_w: - case STRH_w: - case STR_w: - VIXL_FALLTHROUGH(); - case STR_x: - store_int_counter->Increment(); - break; - case STR_s: - VIXL_FALLTHROUGH(); - case STR_d: - store_fp_counter->Increment(); - break; - case LDRB_w: - case LDRH_w: - case LDR_w: - case LDR_x: - case LDRSB_x: - case LDRSH_x: - case LDRSW_x: - case LDRSB_w: - VIXL_FALLTHROUGH(); - case LDRSH_w: - load_int_counter->Increment(); - break; - case LDR_s: - VIXL_FALLTHROUGH(); - case LDR_d: - load_fp_counter->Increment(); - break; - } -} - - -void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) { - Update(); - InstrumentLoadStore(instr); -} - - -void Instrument::VisitLoadStorePostIndex(const Instruction* instr) { - USE(instr); - Update(); - InstrumentLoadStore(instr); -} - - -void Instrument::VisitLoadStorePreIndex(const Instruction* instr) { - Update(); - InstrumentLoadStore(instr); -} - - -void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) { - Update(); - InstrumentLoadStore(instr); -} - -void Instrument::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) { - Update(); - switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { - case STLURB: - case STLURH: - case STLUR_w: - case STLUR_x: { - static Counter* counter = GetCounter("Store Integer"); - counter->Increment(); - break; - } - case LDAPURB: - case LDAPURSB_w: - case LDAPURSB_x: - case LDAPURH: - case LDAPURSH_w: - case LDAPURSH_x: - case LDAPUR_w: - case LDAPURSW: - case LDAPUR_x: { - static Counter* counter = GetCounter("Load Integer"); - counter->Increment(); - break; - } - } -} - - -void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) { - Update(); - InstrumentLoadStore(instr); -} - - -void Instrument::VisitLogicalShifted(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Logical DP"); - counter->Increment(); -} - - -void Instrument::VisitAddSubShifted(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Add/Sub DP"); - counter->Increment(); -} - - -void Instrument::VisitAddSubExtended(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Add/Sub DP"); - counter->Increment(); -} - - -void Instrument::VisitAddSubWithCarry(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Add/Sub DP"); - counter->Increment(); -} - - -void Instrument::VisitRotateRightIntoFlags(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitEvaluateIntoFlags(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitConditionalCompareRegister(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Conditional Compare"); - counter->Increment(); -} - - -void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Conditional Compare"); - counter->Increment(); -} - - -void Instrument::VisitConditionalSelect(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Conditional Select"); - counter->Increment(); -} - - -void Instrument::VisitDataProcessing1Source(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other Int DP"); - counter->Increment(); -} - - -void Instrument::VisitDataProcessing2Source(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other Int DP"); - counter->Increment(); -} - - -void Instrument::VisitDataProcessing3Source(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other Int DP"); - counter->Increment(); -} - - -void Instrument::VisitFPCompare(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitFPConditionalCompare(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Conditional Compare"); - counter->Increment(); -} - - -void Instrument::VisitFPConditionalSelect(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Conditional Select"); - counter->Increment(); -} - - -void Instrument::VisitFPImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitFPIntegerConvert(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitFPFixedPointConvert(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("FP DP"); - counter->Increment(); -} - - -void Instrument::VisitCrypto2RegSHA(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Crypto"); - counter->Increment(); -} - - -void Instrument::VisitCrypto3RegSHA(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Crypto"); - counter->Increment(); -} - - -void Instrument::VisitCryptoAES(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Crypto"); - counter->Increment(); -} - - -void Instrument::VisitNEON2RegMisc(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEON3Same(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEON3SameFP16(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEON3SameExtra(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEON3Different(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONAcrossLanes(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONByIndexedElement(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONCopy(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONExtract(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONLoadStoreMultiStructPostIndex( - const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONLoadStoreSingleStructPostIndex( - const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalar3Diff(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalar3Same(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalarCopy(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalarPairwise(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONShiftImmediate(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONTable(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitNEONPerm(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("NEON"); - counter->Increment(); -} - - -void Instrument::VisitReserved(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitUnallocated(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -void Instrument::VisitUnimplemented(const Instruction* instr) { - USE(instr); - Update(); - static Counter* counter = GetCounter("Other"); - counter->Increment(); -} - - -} // namespace aarch64 -} // namespace vixl diff --git a/src/aarch64/instrument-aarch64.h b/src/aarch64/instrument-aarch64.h deleted file mode 100644 index 4401b3ea..00000000 --- a/src/aarch64/instrument-aarch64.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2014, VIXL authors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of ARM Limited nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef VIXL_AARCH64_INSTRUMENT_AARCH64_H_ -#define VIXL_AARCH64_INSTRUMENT_AARCH64_H_ - -#include "../globals-vixl.h" -#include "../utils-vixl.h" - -#include "constants-aarch64.h" -#include "decoder-aarch64.h" -#include "instrument-aarch64.h" - -namespace vixl { -namespace aarch64 { - -const int kCounterNameMaxLength = 256; -const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22; - - -enum InstrumentState { InstrumentStateDisable = 0, InstrumentStateEnable = 1 }; - - -enum CounterType { - Gauge = 0, // Gauge counters reset themselves after reading. - Cumulative = 1 // Cumulative counters keep their value after reading. -}; - - -class Counter { - public: - explicit Counter(const char* name, CounterType type = Gauge); - - void Increment(); - void Enable(); - void Disable(); - bool IsEnabled(); - uint64_t GetCount(); - VIXL_DEPRECATED("GetCount", uint64_t count()) { return GetCount(); } - - const char* GetName(); - VIXL_DEPRECATED("GetName", const char* name()) { return GetName(); } - - CounterType GetType(); - VIXL_DEPRECATED("GetType", CounterType type()) { return GetType(); } - - private: - char name_[kCounterNameMaxLength]; - uint64_t count_; - bool enabled_; - CounterType type_; -}; - - -class Instrument : public DecoderVisitor { - public: - explicit Instrument( - const char* datafile = NULL, - uint64_t sample_period = kDefaultInstrumentationSamplingPeriod); - ~Instrument(); - - void Enable(); - void Disable(); - -// Declare all Visitor functions. -#define DECLARE(A) void Visit##A(const Instruction* instr) VIXL_OVERRIDE; - VISITOR_LIST(DECLARE) -#undef DECLARE - - private: - void Update(); - void DumpCounters(); - void DumpCounterNames(); - void DumpEventMarker(unsigned marker); - void HandleInstrumentationEvent(unsigned event); - Counter* GetCounter(const char* name); - - void InstrumentLoadStore(const Instruction* instr); - void InstrumentLoadStorePair(const Instruction* instr); - - std::list<Counter*> counters_; - - FILE* output_stream_; - - // Counter information is dumped every sample_period_ instructions decoded. - // For a sample_period_ = 0 a final counter value is only produced when the - // Instrumentation class is destroyed. - uint64_t sample_period_; -}; - -} // namespace aarch64 -} // namespace vixl - -#endif // VIXL_AARCH64_INSTRUMENT_AARCH64_H_ diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc index e7ede2f9..cab02573 100644 --- a/src/aarch64/logic-aarch64.cc +++ b/src/aarch64/logic-aarch64.cc @@ -184,14 +184,28 @@ void Simulator::ld1(VectorFormat vform, } -void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { +void Simulator::ld1r(VectorFormat vform, + VectorFormat unpack_vform, + LogicVRegister dst, + uint64_t addr, + bool is_signed) { + unsigned unpack_size = LaneSizeInBitsFromFormat(unpack_vform); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.ReadUintFromMem(vform, i, addr); + if (is_signed) { + dst.ReadIntFromMem(vform, unpack_size, i, addr); + } else { + dst.ReadUintFromMem(vform, unpack_size, i, addr); + } } } +void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { + ld1r(vform, vform, dst, addr); +} + + void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, @@ -550,6 +564,7 @@ LogicVRegister Simulator::add(VectorFormat vform, const LogicVRegister& src2) { int lane_size = LaneSizeInBitsFromFormat(vform); dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { // Test for unsigned saturation. uint64_t ua = src1.UintLeftJustified(vform, i); @@ -568,12 +583,39 @@ LogicVRegister Simulator::add(VectorFormat vform, if ((pos_a == pos_b) && (pos_a != pos_r)) { dst.SetSignedSat(i, pos_a); } - dst.SetInt(vform, i, ur >> (64 - lane_size)); } return dst; } +LogicVRegister Simulator::add_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value) { + int lane_size = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT(IsUintN(lane_size, value)); + dst.ClearForWrite(vform); + // Left-justify `value`. + uint64_t ub = value << (64 - lane_size); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ur = ua + ub; + if (ur < ua) { + dst.SetUnsignedSat(i, true); + } + + // Test for signed saturation. + // `value` is always positive, so we have an overflow if the (signed) result + // is smaller than the first operand. + if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) { + dst.SetSignedSat(i, true); + } + + dst.SetInt(vform, i, ur >> (64 - lane_size)); + } + return dst; +} LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, @@ -586,25 +628,68 @@ LogicVRegister Simulator::addp(VectorFormat vform, return dst; } +LogicVRegister Simulator::sdiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t val1 = src1.Int(vform, i); + int64_t val2 = src2.Int(vform, i); + int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt; + int64_t quotient = 0; + if ((val1 == min_int) && (val2 == -1)) { + quotient = min_int; + } else if (val2 != 0) { + quotient = val1 / val2; + } + dst.SetInt(vform, i, quotient); + } + + return dst; +} + +LogicVRegister Simulator::udiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t val1 = src1.Uint(vform, i); + uint64_t val2 = src2.Uint(vform, i); + uint64_t quotient = 0; + if (val2 != 0) { + quotient = val1 / val2; + } + dst.SetUint(vform, i, quotient); + } + + return dst; +} + LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; mul(vform, temp, src1, src2); - add(vform, dst, dst, temp); + add(vform, dst, srca, temp); return dst; } LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; mul(vform, temp, src1, src2); - sub(vform, dst, dst, temp); + sub(vform, dst, srca, temp); return dst; } @@ -614,6 +699,7 @@ LogicVRegister Simulator::mul(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); } @@ -632,6 +718,70 @@ LogicVRegister Simulator::mul(VectorFormat vform, } +LogicVRegister Simulator::smulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t dst_val; + int64_t val1 = src1.Int(vform, i); + int64_t val2 = src2.Int(vform, i); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + dst_val = internal::MultiplyHigh<8>(val1, val2); + break; + case 16: + dst_val = internal::MultiplyHigh<16>(val1, val2); + break; + case 32: + dst_val = internal::MultiplyHigh<32>(val1, val2); + break; + case 64: + dst_val = internal::MultiplyHigh<64>(val1, val2); + break; + default: + dst_val = 0xbadbeef; + VIXL_UNREACHABLE(); + break; + } + dst.SetInt(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::umulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t dst_val; + uint64_t val1 = src1.Uint(vform, i); + uint64_t val2 = src2.Uint(vform, i); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + dst_val = internal::MultiplyHigh<8>(val1, val2); + break; + case 16: + dst_val = internal::MultiplyHigh<16>(val1, val2); + break; + case 32: + dst_val = internal::MultiplyHigh<32>(val1, val2); + break; + case 64: + dst_val = internal::MultiplyHigh<64>(val1, val2); + break; + default: + dst_val = 0xbadbeef; + VIXL_UNREACHABLE(); + break; + } + dst.SetUint(vform, i, dst_val); + } + return dst; +} + + LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -639,7 +789,7 @@ LogicVRegister Simulator::mla(VectorFormat vform, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); - return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); + return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); } @@ -650,7 +800,7 @@ LogicVRegister Simulator::mls(VectorFormat vform, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); - return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); + return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); } @@ -898,8 +1048,14 @@ LogicVRegister Simulator::sdot(VectorFormat vform, const LogicVRegister& src2, int index) { SimVRegister temp; - VectorFormat indexform = VectorFormatFillQ(vform); - return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index)); + // NEON indexed `dot` allows the index value exceed the register size. + // Promote the format to Q-sized vector format before the duplication. + dup_elements_to_segments(IsSVEFormat(vform) ? vform + : VectorFormatFillQ(vform), + temp, + src2, + index); + return sdot(vform, dst, src1, temp); } @@ -920,8 +1076,14 @@ LogicVRegister Simulator::udot(VectorFormat vform, const LogicVRegister& src2, int index) { SimVRegister temp; - VectorFormat indexform = VectorFormatFillQ(vform); - return udot(vform, dst, src1, dup_element(indexform, temp, src2, index)); + // NEON indexed `dot` allows the index value exceed the register size. + // Promote the format to Q-sized vector format before the duplication. + dup_elements_to_segments(IsSVEFormat(vform) ? vform + : VectorFormatFillQ(vform), + temp, + src2, + index); + return udot(vform, dst, src1, temp); } @@ -1025,6 +1187,34 @@ LogicVRegister Simulator::sub(VectorFormat vform, return dst; } +LogicVRegister Simulator::sub_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value) { + int lane_size = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT(IsUintN(lane_size, value)); + dst.ClearForWrite(vform); + // Left-justify `value`. + uint64_t ub = value << (64 - lane_size); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ur = ua - ub; + if (ub > ua) { + dst.SetUnsignedSat(i, false); + } + + // Test for signed saturation. + // `value` is always positive, so we have an overflow if the (signed) result + // is greater than the first operand. + if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) { + dst.SetSignedSat(i, false); + } + + dst.SetInt(vform, i, ur >> (64 - lane_size)); + } + return dst; +} LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, @@ -1091,12 +1281,12 @@ LogicVRegister Simulator::bic(VectorFormat vform, const LogicVRegister& src, uint64_t imm) { uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; ++i) { + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { result[i] = src.Uint(vform, i) & ~imm; } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -1298,10 +1488,13 @@ LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister Simulator::sminmaxv(VectorFormat vform, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src, bool max) { int64_t dst_val = max ? INT64_MIN : INT64_MAX; for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + int64_t src_val = src.Int(vform, i); if (max) { dst_val = (src_val > dst_val) ? src_val : dst_val; @@ -1318,15 +1511,35 @@ LogicVRegister Simulator::sminmaxv(VectorFormat vform, LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - sminmaxv(vform, dst, src, true); + sminmaxv(vform, dst, GetPTrue(), src, true); + return dst; +} + + +LogicVRegister Simulator::sminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + sminmaxv(vform, dst, GetPTrue(), src, false); + return dst; +} + + +LogicVRegister Simulator::smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + sminmaxv(vform, dst, pg, src, true); return dst; } LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src) { - sminmaxv(vform, dst, src, false); + VIXL_ASSERT(IsSVEFormat(vform)); + sminmaxv(vform, dst, pg, src, false); return dst; } @@ -1414,10 +1627,13 @@ LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister Simulator::uminmaxv(VectorFormat vform, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src, bool max) { uint64_t dst_val = max ? 0 : UINT64_MAX; for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + uint64_t src_val = src.Uint(vform, i); if (max) { dst_val = (src_val > dst_val) ? src_val : dst_val; @@ -1434,7 +1650,7 @@ LogicVRegister Simulator::uminmaxv(VectorFormat vform, LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uminmaxv(vform, dst, src, true); + uminmaxv(vform, dst, GetPTrue(), src, true); return dst; } @@ -1442,7 +1658,27 @@ LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uminmaxv(vform, dst, src, false); + uminmaxv(vform, dst, GetPTrue(), src, false); + return dst; +} + + +LogicVRegister Simulator::umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uminmaxv(vform, dst, pg, src, true); + return dst; +} + + +LogicVRegister Simulator::uminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uminmaxv(vform, dst, pg, src, false); return dst; } @@ -1521,14 +1757,104 @@ LogicVRegister Simulator::ushll2(VectorFormat vform, return ushl(vform, dst, extendedreg, shiftreg); } +std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform, + const LogicPRegister& pg, + const LogicVRegister& src, + int offset_from_last_active) { + // Untested for any other values. + VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1)); + + int last_active = GetLastActive(vform, pg); + int lane_count = LaneCountFromFormat(vform); + int index = + ((last_active + offset_from_last_active) + lane_count) % lane_count; + return std::make_pair(last_active >= 0, src.Uint(vform, index)); +} + +LogicVRegister Simulator::compact(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + int j = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (pg.IsActive(vform, i)) { + dst.SetUint(vform, j++, src.Uint(vform, i)); + } + } + for (; j < LaneCountFromFormat(vform); j++) { + dst.SetUint(vform, j, 0); + } + return dst; +} + +LogicVRegister Simulator::splice(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2) { + int lane_count = LaneCountFromFormat(vform); + int first_active = GetFirstActive(vform, pg); + int last_active = GetLastActive(vform, pg); + int dst_idx = 0; + uint64_t result[kZRegMaxSizeInBytes]; + + if (first_active >= 0) { + VIXL_ASSERT(last_active >= first_active); + VIXL_ASSERT(last_active < lane_count); + for (int i = first_active; i <= last_active; i++) { + result[dst_idx++] = src1.Uint(vform, i); + } + } + + VIXL_ASSERT(dst_idx <= lane_count); + for (int i = dst_idx; i < lane_count; i++) { + result[i] = src2.Uint(vform, i - dst_idx); + } + + for (int i = 0; i < lane_count; i++) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + +LogicVRegister Simulator::sel(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2) { + int p_reg_bits_per_lane = + LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit; + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane) + ? src1.Uint(vform, lane) + : src2.Uint(vform, lane); + dst.SetUint(vform, lane, lane_value); + } + return dst; +} + + +LogicPRegister Simulator::sel(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src1, + const LogicPRegister& src2) { + for (int i = 0; i < dst.GetChunkCount(); i++) { + LogicPRegister::ChunkType mask = pg.GetChunk(i); + LogicPRegister::ChunkType result = + (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i)); + dst.SetChunk(i, result); + } + return dst; +} + LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { dst.ClearForWrite(vform); - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; i++) { + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i++) { uint64_t src_lane = src.Uint(vform, i); uint64_t dst_lane = dst.Uint(vform, i); uint64_t shifted = src_lane << shift; @@ -1577,10 +1903,10 @@ LogicVRegister Simulator::sri(VectorFormat vform, const LogicVRegister& src, int shift) { dst.ClearForWrite(vform); - int laneCount = LaneCountFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); VIXL_ASSERT((shift > 0) && (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); - for (int i = 0; i < laneCount; i++) { + for (int i = 0; i < lane_count; i++) { uint64_t src_lane = src.Uint(vform, i); uint64_t dst_lane = dst.Uint(vform, i); uint64_t shifted; @@ -1663,15 +1989,18 @@ LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uint64_t result[16]; - int laneSizeInBits = LaneSizeInBitsFromFormat(vform); - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; i++) { - result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); + + // Ensure that we can store one result per lane. + int result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < lane_count; i++) { + result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -1681,38 +2010,51 @@ LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uint64_t result[16]; - int laneSizeInBits = LaneSizeInBitsFromFormat(vform); - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; i++) { - result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); + + // Ensure that we can store one result per lane. + int result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < lane_count; i++) { + result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } +LogicVRegister Simulator::cnot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0; + dst.SetUint(vform, i, value); + } + return dst; +} + + LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uint64_t result[16]; - int laneSizeInBits = LaneSizeInBitsFromFormat(vform); - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; i++) { - uint64_t value = src.Uint(vform, i); - result[i] = 0; - for (int j = 0; j < laneSizeInBits; j++) { - result[i] += (value & 1); - value >>= 1; - } + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); + + // Ensure that we can store one result per lane. + int result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < lane_count; i++) { + result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -1896,11 +2238,108 @@ LogicVRegister Simulator::abs(VectorFormat vform, } +LogicVRegister Simulator::andv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform)); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result &= src.Uint(vform, i); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, result); + return dst; +} + + +LogicVRegister Simulator::eorv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result ^= src.Uint(vform, i); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, result); + return dst; +} + + +LogicVRegister Simulator::orv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result |= src.Uint(vform, i); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, result); + return dst; +} + + +LogicVRegister Simulator::saddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize); + int64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + // The destination register always has D-lane sizes and the source register + // always has S-lanes or smaller, so signed integer overflow -- undefined + // behaviour -- can't occur. + result += src.Int(vform, i); + } + + dst.ClearForWrite(kFormatD); + dst.SetInt(kFormatD, 0, result); + return dst; +} + + +LogicVRegister Simulator::uaddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result += src.Uint(vform, i); + } + + dst.ClearForWrite(kFormatD); + dst.SetUint(kFormatD, 0, result); + return dst; +} + + LogicVRegister Simulator::extractnarrow(VectorFormat dstform, LogicVRegister dst, - bool dstIsSigned, + bool dst_is_signed, const LogicVRegister& src, - bool srcIsSigned) { + bool src_is_signed) { bool upperhalf = false; VectorFormat srcform = kFormatUndefined; int64_t ssrc[8]; @@ -1969,7 +2408,7 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform, } // Test for unsigned saturation - if (srcIsSigned) { + if (src_is_signed) { if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { dst.SetUnsignedSat(offset + i, true); } else if (ssrc[i] < 0) { @@ -1982,13 +2421,13 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform, } int64_t result; - if (srcIsSigned) { + if (src_is_signed) { result = ssrc[i] & MaxUintFromFormat(dstform); } else { result = usrc[i] & MaxUintFromFormat(dstform); } - if (dstIsSigned) { + if (dst_is_signed) { dst.SetInt(dstform, offset + i, result); } else { dst.SetUint(dstform, offset + i, result); @@ -2030,17 +2469,17 @@ LogicVRegister Simulator::absdiff(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - bool issigned) { + bool is_signed) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (issigned) { - int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); - sr = sr > 0 ? sr : -sr; - dst.SetInt(vform, i, sr); + bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i)) + : (src1.Uint(vform, i) > src2.Uint(vform, i)); + // Always calculate the answer using unsigned arithmetic, to avoid + // implemenation-defined signed overflow. + if (src1_gt_src2) { + dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i)); } else { - int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); - sr = sr > 0 ? sr : -sr; - dst.SetUint(vform, i, sr); + dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i)); } } return dst; @@ -2085,15 +2524,15 @@ LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); uint64_t reversed_value; uint64_t value; - for (int i = 0; i < laneCount; i++) { + for (int i = 0; i < lane_count; i++) { value = src.Uint(vform, i); reversed_value = 0; - for (int j = 0; j < laneSizeInBits; j++) { + for (int j = 0; j < lane_size_in_bits; j++) { reversed_value = (reversed_value << 1) | (value & 1); value >>= 1; } @@ -2101,7 +2540,7 @@ LogicVRegister Simulator::rbit(VectorFormat vform, } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2110,19 +2549,33 @@ LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src, - int revSize) { - uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - int laneSize = LaneSizeInBytesFromFormat(vform); - int lanesPerLoop = revSize / laneSize; - for (int i = 0; i < laneCount; i += lanesPerLoop) { - for (int j = 0; j < lanesPerLoop; j++) { - result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count / 2; i++) { + uint64_t t = src.Uint(vform, i); + dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1)); + dst.SetUint(vform, lane_count - i - 1, t); + } + return dst; +} + + +LogicVRegister Simulator::rev_byte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rev_size) { + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int lane_size = LaneSizeInBytesFromFormat(vform); + int lanes_per_loop = rev_size / lane_size; + for (int i = 0; i < lane_count; i += lanes_per_loop) { + for (int j = 0; j < lanes_per_loop; j++) { + result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j); } } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2132,21 +2585,21 @@ LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - return rev(vform, dst, src, 2); + return rev_byte(vform, dst, src, 2); } LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - return rev(vform, dst, src, 4); + return rev_byte(vform, dst, src, 4); } LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - return rev(vform, dst, src, 8); + return rev_byte(vform, dst, src, 8); } @@ -2215,22 +2668,60 @@ LogicVRegister Simulator::ext(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2, int index) { - uint8_t result[16]; - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount - index; ++i) { + uint8_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count - index; ++i) { result[i] = src1.Uint(vform, i + index); } for (int i = 0; i < index; ++i) { - result[laneCount - index + i] = src2.Uint(vform, i); + result[lane_count - index + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } template <typename T> +LogicVRegister Simulator::fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src) { + T result = acc.Float<T>(0); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result = FPAdd(result, src.Float<T>(i)); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + acc.ClearForWrite(vform_dst); + acc.SetFloat(0, result); + return acc; +} + +LogicVRegister Simulator::fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src) { + switch (LaneSizeInBitsFromFormat(vform)) { + case kHRegSize: + fadda<SimFloat16>(vform, acc, pg, src); + break; + case kSRegSize: + fadda<float>(vform, acc, pg, src); + break; + case kDRegSize: + fadda<double>(vform, acc, pg, src); + break; + default: + VIXL_UNREACHABLE(); + } + return acc; +} + +template <typename T> LogicVRegister Simulator::fcadd(VectorFormat vform, LogicVRegister dst, // d const LogicVRegister& src1, // n @@ -2273,7 +2764,7 @@ LogicVRegister Simulator::fcadd(VectorFormat vform, const LogicVRegister& src2, // m int rot) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - VIXL_UNIMPLEMENTED(); + fcadd<SimFloat16>(vform, dst, src1, src2, rot); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fcadd<float>(vform, dst, src1, src2, rot); } else { @@ -2283,12 +2774,12 @@ LogicVRegister Simulator::fcadd(VectorFormat vform, return dst; } - template <typename T> LogicVRegister Simulator::fcmla(VectorFormat vform, - LogicVRegister dst, // d - const LogicVRegister& src1, // n - const LogicVRegister& src2, // m + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + const LogicVRegister& acc, int index, int rot) { int elements = LaneCountFromFormat(vform); @@ -2301,83 +2792,33 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) for (int e = 0; e <= (elements / 2) - 1; e++) { - switch (rot) { - case 0: - element1 = src2.Float<T>(index * 2); - element2 = src1.Float<T>(e * 2); - element3 = src2.Float<T>(index * 2 + 1); - element4 = src1.Float<T>(e * 2); - break; - case 90: - element1 = FPNeg(src2.Float<T>(index * 2 + 1)); - element2 = src1.Float<T>(e * 2 + 1); - element3 = src2.Float<T>(index * 2); - element4 = src1.Float<T>(e * 2 + 1); - break; - case 180: - element1 = FPNeg(src2.Float<T>(index * 2)); - element2 = src1.Float<T>(e * 2); - element3 = FPNeg(src2.Float<T>(index * 2 + 1)); - element4 = src1.Float<T>(e * 2); - break; - case 270: - element1 = src2.Float<T>(index * 2 + 1); - element2 = src1.Float<T>(e * 2 + 1); - element3 = FPNeg(src2.Float<T>(index * 2)); - element4 = src1.Float<T>(e * 2 + 1); - break; - default: - VIXL_UNREACHABLE(); - return dst; // prevents "element(n) may be unintialized" errors - } - dst.ClearForWrite(vform); - dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1)); - dst.SetFloat<T>(e * 2 + 1, - FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3)); - } - return dst; -} - - -template <typename T> -LogicVRegister Simulator::fcmla(VectorFormat vform, - LogicVRegister dst, // d - const LogicVRegister& src1, // n - const LogicVRegister& src2, // m - int rot) { - int elements = LaneCountFromFormat(vform); - - T element1, element2, element3, element4; - rot *= 90; - - // Loop example: - // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) - // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) + // Index == -1 indicates a vector/vector rather than vector/indexed-element + // operation. + int f = (index < 0) ? e : index; - for (int e = 0; e <= (elements / 2) - 1; e++) { switch (rot) { case 0: - element1 = src2.Float<T>(e * 2); + element1 = src2.Float<T>(f * 2); element2 = src1.Float<T>(e * 2); - element3 = src2.Float<T>(e * 2 + 1); + element3 = src2.Float<T>(f * 2 + 1); element4 = src1.Float<T>(e * 2); break; case 90: - element1 = FPNeg(src2.Float<T>(e * 2 + 1)); + element1 = FPNeg(src2.Float<T>(f * 2 + 1)); element2 = src1.Float<T>(e * 2 + 1); - element3 = src2.Float<T>(e * 2); + element3 = src2.Float<T>(f * 2); element4 = src1.Float<T>(e * 2 + 1); break; case 180: - element1 = FPNeg(src2.Float<T>(e * 2)); + element1 = FPNeg(src2.Float<T>(f * 2)); element2 = src1.Float<T>(e * 2); - element3 = FPNeg(src2.Float<T>(e * 2 + 1)); + element3 = FPNeg(src2.Float<T>(f * 2 + 1)); element4 = src1.Float<T>(e * 2); break; case 270: - element1 = src2.Float<T>(e * 2 + 1); + element1 = src2.Float<T>(f * 2 + 1); element2 = src1.Float<T>(e * 2 + 1); - element3 = FPNeg(src2.Float<T>(e * 2)); + element3 = FPNeg(src2.Float<T>(f * 2)); element4 = src1.Float<T>(e * 2 + 1); break; default: @@ -2385,25 +2826,28 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, return dst; // prevents "element(n) may be unintialized" errors } dst.ClearForWrite(vform); - dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1)); - dst.SetFloat<T>(e * 2 + 1, - FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3)); + dst.SetFloat<T>(vform, + e * 2, + FPMulAdd(acc.Float<T>(e * 2), element2, element1)); + dst.SetFloat<T>(vform, + e * 2 + 1, + FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3)); } return dst; } - LogicVRegister Simulator::fcmla(VectorFormat vform, - LogicVRegister dst, // d - const LogicVRegister& src1, // n - const LogicVRegister& src2, // m + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + const LogicVRegister& acc, int rot) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - VIXL_UNIMPLEMENTED(); + fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fcmla<float>(vform, dst, src1, src2, rot); + fcmla<float>(vform, dst, src1, src2, acc, -1, rot); } else { - fcmla<double>(vform, dst, src1, src2, rot); + fcmla<double>(vform, dst, src1, src2, acc, -1, rot); } return dst; } @@ -2418,9 +2862,9 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { VIXL_UNIMPLEMENTED(); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fcmla<float>(vform, dst, src1, src2, index, rot); + fcmla<float>(vform, dst, src1, src2, dst, index, rot); } else { - fcmla<double>(vform, dst, src1, src2, index, rot); + fcmla<double>(vform, dst, src1, src2, dst, index, rot); } return dst; } @@ -2430,23 +2874,59 @@ LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int src_index) { - int laneCount = LaneCountFromFormat(vform); - uint64_t value = src.Uint(vform, src_index); - dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { - dst.SetUint(vform, i, value); + if (vform == kFormatVnQ) { + // When duplicating a 128-bit value, split it into two 64-bit parts, and + // then copy the two to their slots on destination register. + uint64_t low = src.Uint(kFormatVnD, src_index * 2); + uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1); + dst.ClearForWrite(vform); + for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD); + d_lane += 2) { + dst.SetUint(kFormatVnD, d_lane, low); + dst.SetUint(kFormatVnD, d_lane + 1, high); + } + } else { + int lane_count = LaneCountFromFormat(vform); + uint64_t value = src.Uint(vform, src_index); + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, value); + } } return dst; } +LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index) { + // In SVE, a segment is a 128-bit portion of a vector, like a Q register, + // whereas in NEON, the size of segment is equal to the size of register + // itself. + int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform)); + VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform))); + int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform); + + VIXL_ASSERT(src_index >= 0); + VIXL_ASSERT(src_index < lanes_per_segment); + + dst.ClearForWrite(vform); + for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) { + uint64_t value = src.Uint(vform, j + src_index); + for (int i = 0; i < lanes_per_segment; i++) { + dst.SetUint(vform, j + i, value); + } + } + return dst; +} LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm) { - int laneCount = LaneCountFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); uint64_t value = imm & MaxUintFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, value); } return dst; @@ -2473,12 +2953,93 @@ LogicVRegister Simulator::ins_immediate(VectorFormat vform, } +LogicVRegister Simulator::index(VectorFormat vform, + LogicVRegister dst, + uint64_t start, + uint64_t step) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t value = start; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, value); + value += step; + } + return dst; +} + + +LogicVRegister Simulator::insr(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + VIXL_ASSERT(IsSVEFormat(vform)); + for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) { + dst.SetUint(vform, i, dst.Uint(vform, i - 1)); + } + dst.SetUint(vform, 0, imm); + return dst; +} + + +LogicVRegister Simulator::mov(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + dst.SetUint(vform, lane, src.Uint(vform, lane)); + } + return dst; +} + + +LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) { + // Avoid a copy if the registers already alias. + if (dst.Aliases(src)) return dst; + + for (int i = 0; i < dst.GetChunkCount(); i++) { + dst.SetChunk(i, src.GetChunk(i)); + } + return dst; +} + + +LogicVRegister Simulator::mov_merging(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src) { + return sel(vform, dst, pg, src, dst); +} + + +LogicVRegister Simulator::mov_zeroing(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src) { + SimVRegister zero; + dup_immediate(vform, zero, 0); + return sel(vform, dst, pg, src, zero); +} + + +LogicPRegister Simulator::mov_merging(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + return sel(dst, pg, src, dst); +} + + +LogicPRegister Simulator::mov_zeroing(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + SimPRegister all_false; + return sel(dst, pg, src, pfalse(all_false)); +} + + LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, uint64_t imm) { - int laneCount = LaneCountFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, imm); } return dst; @@ -2488,9 +3049,9 @@ LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm) { - int laneCount = LaneCountFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, ~imm); } return dst; @@ -2502,12 +3063,12 @@ LogicVRegister Simulator::orr(VectorFormat vform, const LogicVRegister& src, uint64_t imm) { uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; ++i) { + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { result[i] = src.Uint(vform, i) | imm; } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2568,6 +3129,37 @@ LogicVRegister Simulator::sxtl2(VectorFormat vform, } +LogicVRegister Simulator::uxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits) { + int lane_count = LaneCountFromFormat(vform); + uint64_t mask = GetUintMask(from_size_in_bits); + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetInt(vform, i, src.Uint(vform, i) & mask); + } + return dst; +} + + +LogicVRegister Simulator::sxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits) { + int lane_count = LaneCountFromFormat(vform); + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + uint64_t value = + ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i)); + dst.SetInt(vform, i, value); + } + return dst; +} + + LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, @@ -2615,6 +3207,22 @@ LogicVRegister Simulator::rshrn2(VectorFormat vform, return extractnarrow(vformdst, dst, false, shifted_src, false); } +LogicVRegister Simulator::Table(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind) { + VIXL_ASSERT(IsSVEFormat(vform)); + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i++) { + uint64_t index = ind.Uint(vform, i); + uint64_t value = (index >= static_cast<uint64_t>(lane_count)) + ? 0 + : tab.Uint(vform, static_cast<int>(index)); + dst.SetUint(vform, i, value); + } + return dst; +} + LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, @@ -3182,7 +3790,7 @@ LogicVRegister Simulator::umlsl(VectorFormat vform, SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); - mls(vform, dst, temp1, temp2); + mls(vform, dst, dst, temp1, temp2); return dst; } @@ -3194,7 +3802,7 @@ LogicVRegister Simulator::umlsl2(VectorFormat vform, SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); - mls(vform, dst, temp1, temp2); + mls(vform, dst, dst, temp1, temp2); return dst; } @@ -3206,7 +3814,7 @@ LogicVRegister Simulator::smlsl(VectorFormat vform, SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); - mls(vform, dst, temp1, temp2); + mls(vform, dst, dst, temp1, temp2); return dst; } @@ -3218,7 +3826,7 @@ LogicVRegister Simulator::smlsl2(VectorFormat vform, SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); - mls(vform, dst, temp1, temp2); + mls(vform, dst, dst, temp1, temp2); return dst; } @@ -3230,7 +3838,7 @@ LogicVRegister Simulator::umlal(VectorFormat vform, SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); - mla(vform, dst, temp1, temp2); + mla(vform, dst, dst, temp1, temp2); return dst; } @@ -3242,7 +3850,7 @@ LogicVRegister Simulator::umlal2(VectorFormat vform, SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); - mla(vform, dst, temp1, temp2); + mla(vform, dst, dst, temp1, temp2); return dst; } @@ -3254,7 +3862,7 @@ LogicVRegister Simulator::smlal(VectorFormat vform, SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); - mla(vform, dst, temp1, temp2); + mla(vform, dst, dst, temp1, temp2); return dst; } @@ -3266,7 +3874,7 @@ LogicVRegister Simulator::smlal2(VectorFormat vform, SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); - mla(vform, dst, temp1, temp2); + mla(vform, dst, dst, temp1, temp2); return dst; } @@ -3371,7 +3979,7 @@ LogicVRegister Simulator::dot(VectorFormat vform, dst.ClearForWrite(vform); for (int e = 0; e < LaneCountFromFormat(vform); e++) { - int64_t result = 0; + uint64_t result = 0; int64_t element1, element2; for (int i = 0; i < 4; i++) { int index = 4 * e + i; @@ -3384,9 +3992,7 @@ LogicVRegister Simulator::dot(VectorFormat vform, } result += element1 * element2; } - - result += dst.Int(vform, e); - dst.SetInt(vform, e, result); + dst.SetUint(vform, e, result + dst.Uint(vform, e)); } return dst; } @@ -3564,16 +4170,16 @@ LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - int pairs = laneCount / 2; + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, 2 * i); result[(2 * i) + 1] = src2.Uint(vform, 2 * i); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -3584,16 +4190,16 @@ LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - int pairs = laneCount / 2; + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, (2 * i) + 1); result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -3604,16 +4210,16 @@ LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - int pairs = laneCount / 2; + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, i); result[(2 * i) + 1] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -3624,16 +4230,16 @@ LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[16]; - int laneCount = LaneCountFromFormat(vform); - int pairs = laneCount / 2; + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, pairs + i); result[(2 * i) + 1] = src2.Uint(vform, pairs + i); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -3644,15 +4250,15 @@ LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[32]; - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; ++i) { + uint64_t result[kZRegMaxSizeInBytes * 2]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { result[i] = src1.Uint(vform, i); - result[laneCount + i] = src2.Uint(vform, i); + result[lane_count + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[2 * i]); } return dst; @@ -3663,15 +4269,15 @@ LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[32]; - int laneCount = LaneCountFromFormat(vform); - for (int i = 0; i < laneCount; ++i) { + uint64_t result[kZRegMaxSizeInBytes * 2]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { result[i] = src1.Uint(vform, i); - result[laneCount + i] = src2.Uint(vform, i); + result[lane_count + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < laneCount; ++i) { + for (int i = 0; i < lane_count; ++i) { dst.SetUint(vform, i, result[(2 * i) + 1]); } return dst; @@ -4201,7 +4807,7 @@ uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { } else { \ result = OP(op1, op2); \ } \ - dst.SetFloat(i, result); \ + dst.SetFloat(vform, i, result); \ } \ return dst; \ } \ @@ -4244,7 +4850,7 @@ LogicVRegister Simulator::frecps(VectorFormat vform, T op1 = -src1.Float<T>(i); T op2 = src2.Float<T>(i); T result = FPProcessNaNs(op1, op2); - dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); + dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); } return dst; } @@ -4276,7 +4882,7 @@ LogicVRegister Simulator::frsqrts(VectorFormat vform, T op1 = -src1.Float<T>(i); T op2 = src2.Float<T>(i); T result = FPProcessNaNs(op1, op2); - dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); + dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); } return dst; } @@ -4309,29 +4915,41 @@ LogicVRegister Simulator::fcmp(VectorFormat vform, bool result = false; T op1 = src1.Float<T>(i); T op2 = src2.Float<T>(i); - T nan_result = FPProcessNaNs(op1, op2); - if (!IsNaN(nan_result)) { - switch (cond) { - case eq: - result = (op1 == op2); - break; - case ge: - result = (op1 >= op2); - break; - case gt: - result = (op1 > op2); - break; - case le: - result = (op1 <= op2); - break; - case lt: - result = (op1 < op2); - break; - default: - VIXL_UNREACHABLE(); - break; - } + bool unordered = IsNaN(FPProcessNaNs(op1, op2)); + + switch (cond) { + case eq: + result = (op1 == op2); + break; + case ge: + result = (op1 >= op2); + break; + case gt: + result = (op1 > op2); + break; + case le: + result = (op1 <= op2); + break; + case lt: + result = (op1 < op2); + break; + case ne: + result = (op1 != op2); + break; + case uo: + result = unordered; + break; + default: + // Other conditions are defined in terms of those above. + VIXL_UNREACHABLE(); + break; + } + + if (result && unordered) { + // Only `uo` and `ne` can be true for unordered comparisons. + VIXL_ASSERT((cond == uo) || (cond == ne)); } + dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); } return dst; @@ -4403,15 +5021,16 @@ LogicVRegister Simulator::fabscmp(VectorFormat vform, template <typename T> LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = src1.Float<T>(i); T op2 = src2.Float<T>(i); - T acc = dst.Float<T>(i); + T acc = srca.Float<T>(i); T result = FPMulAdd(acc, op1, op2); - dst.SetFloat(i, result); + dst.SetFloat(vform, i, result); } return dst; } @@ -4419,15 +5038,16 @@ LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fmla<SimFloat16>(vform, dst, src1, src2); + fmla<SimFloat16>(vform, dst, srca, src1, src2); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fmla<float>(vform, dst, src1, src2); + fmla<float>(vform, dst, srca, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - fmla<double>(vform, dst, src1, src2); + fmla<double>(vform, dst, srca, src1, src2); } return dst; } @@ -4436,13 +5056,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform, template <typename T> LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = -src1.Float<T>(i); T op2 = src2.Float<T>(i); - T acc = dst.Float<T>(i); + T acc = srca.Float<T>(i); T result = FPMulAdd(acc, op1, op2); dst.SetFloat(i, result); } @@ -4452,15 +5073,16 @@ LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fmls<SimFloat16>(vform, dst, src1, src2); + fmls<SimFloat16>(vform, dst, srca, src1, src2); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fmls<float>(vform, dst, src1, src2); + fmls<float>(vform, dst, srca, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - fmls<double>(vform, dst, src1, src2); + fmls<double>(vform, dst, srca, src1, src2); } return dst; } @@ -4740,75 +5362,131 @@ NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) #undef DEFINE_NEON_FP_PAIR_OP template <typename T> -LogicVRegister Simulator::fminmaxv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - typename TFPMinMaxOp<T>::type Op) { - VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) || - (vform == kFormat4S)); - USE(vform); - T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1)); - T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3)); - if (vform == kFormat8H) { - T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5)); - T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7)); - result1 = (this->*Op)(result1, result3); - result2 = (this->*Op)(result2, result4); - } - T result = (this->*Op)(result1, result2); +LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp<T>::type fn, + uint64_t inactive_value) { + int lane_count = LaneCountFromFormat(vform); + T result[kZRegMaxSizeInBytes / sizeof(T)]; + // Copy the source vector into a working array. Initialise the unused elements + // at the end of the array to the same value that a false predicate would set. + for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) { + result[i] = (i < lane_count) + ? src.Float<T>(i) + : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value); + } + + // Pairwise reduce the elements to a single value, using the pair op function + // argument. + for (int step = 1; step < lane_count; step *= 2) { + for (int i = 0; i < lane_count; i += step * 2) { + result[i] = (this->*fn)(result[i], result[i + step]); + } + } dst.ClearForWrite(ScalarFormatFromFormat(vform)); - dst.SetFloat<T>(0, result); - return dst; + dst.SetFloat<T>(0, result[0]); + return dst; +} + +LogicVRegister Simulator::FPPairedAcrossHelper( + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp<SimFloat16>::type fn16, + typename TFPPairOp<float>::type fn32, + typename TFPPairOp<double>::type fn64, + uint64_t inactive_value) { + switch (LaneSizeInBitsFromFormat(vform)) { + case kHRegSize: + return FPPairedAcrossHelper<SimFloat16>(vform, + dst, + src, + fn16, + inactive_value); + case kSRegSize: + return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value); + default: + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + return FPPairedAcrossHelper<double>(vform, + dst, + src, + fn64, + inactive_value); + } } +LogicVRegister Simulator::faddv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPAdd<SimFloat16>, + &Simulator::FPAdd<float>, + &Simulator::FPAdd<double>, + 0); +} LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>); - } else { - return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>); - } + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = + FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMax<SimFloat16>, + &Simulator::FPMax<float>, + &Simulator::FPMax<double>, + inactive_value); } LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>); - } else { - return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>); - } + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = + FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMin<SimFloat16>, + &Simulator::FPMin<float>, + &Simulator::FPMin<double>, + inactive_value); } LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - return fminmaxv<SimFloat16>(vform, - dst, - src, - &Simulator::FPMaxNM<SimFloat16>); - } else { - return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>); - } + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMaxNM<SimFloat16>, + &Simulator::FPMaxNM<float>, + &Simulator::FPMaxNM<double>, + inactive_value); } LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - return fminmaxv<SimFloat16>(vform, - dst, - src, - &Simulator::FPMinNM<SimFloat16>); - } else { - return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>); - } + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMinNM<SimFloat16>, + &Simulator::FPMinNM<float>, + &Simulator::FPMinNM<double>, + inactive_value); } @@ -4843,14 +5521,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform, SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); - fmla<SimFloat16>(vform, dst, src1, index_reg); + fmla<SimFloat16>(vform, dst, dst, src1, index_reg); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); - fmla<float>(vform, dst, src1, index_reg); + fmla<float>(vform, dst, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); - fmla<double>(vform, dst, src1, index_reg); + fmla<double>(vform, dst, dst, src1, index_reg); } return dst; } @@ -4865,14 +5543,14 @@ LogicVRegister Simulator::fmls(VectorFormat vform, SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); - fmls<SimFloat16>(vform, dst, src1, index_reg); + fmls<SimFloat16>(vform, dst, dst, src1, index_reg); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); - fmls<float>(vform, dst, src1, index_reg); + fmls<float>(vform, dst, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); - fmls<double>(vform, dst, src1, index_reg); + fmls<double>(vform, dst, dst, src1, index_reg); } return dst; } @@ -4941,62 +5619,142 @@ LogicVRegister Simulator::frint(VectorFormat vform, return dst; } +LogicVRegister Simulator::fcvt(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + double dst_value = + RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits); + + uint64_t dst_raw_bits = + FPToRawbitsWithSize(dst_data_size_in_bits, dst_value); + + dst.SetUint(vform, i, dst_raw_bits); + } + + return dst; +} LogicVRegister Simulator::fcvts(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src, - FPRounding rounding_mode, + FPRounding round, int fbits) { - dst.ClearForWrite(vform); - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - SimFloat16 op = - static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits); - dst.SetInt(vform, i, FPToInt16(op, rounding_mode)); - } - } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - float op = src.Float<float>(i) * std::pow(2.0f, fbits); - dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); - } - } else { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - double op = src.Float<double>(i) * std::pow(2.0, fbits); - dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) * + std::pow(2.0, fbits); + + switch (dst_data_size_in_bits) { + case kHRegSize: + dst.SetInt(vform, i, FPToInt16(result, round)); + break; + case kSRegSize: + dst.SetInt(vform, i, FPToInt32(result, round)); + break; + case kDRegSize: + dst.SetInt(vform, i, FPToInt64(result, round)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; } } + return dst; } +LogicVRegister Simulator::fcvts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding round, + int fbits) { + dst.ClearForWrite(vform); + return fcvts(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} LogicVRegister Simulator::fcvtu(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src, - FPRounding rounding_mode, + FPRounding round, int fbits) { - dst.ClearForWrite(vform); - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - SimFloat16 op = - static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits); - dst.SetUint(vform, i, FPToUInt16(op, rounding_mode)); - } - } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - float op = src.Float<float>(i) * std::pow(2.0f, fbits); - dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); - } - } else { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - double op = src.Float<double>(i) * std::pow(2.0, fbits); - dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) * + std::pow(2.0, fbits); + + switch (dst_data_size_in_bits) { + case kHRegSize: + dst.SetUint(vform, i, FPToUInt16(result, round)); + break; + case kSRegSize: + dst.SetUint(vform, i, FPToUInt32(result, round)); + break; + case kDRegSize: + dst.SetUint(vform, i, FPToUInt64(result, round)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; } } + return dst; } +LogicVRegister Simulator::fcvtu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding round, + int fbits) { + dst.ClearForWrite(vform); + return fcvtu(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, @@ -5208,18 +5966,18 @@ LogicVRegister Simulator::frsqrte(VectorFormat vform, if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { SimFloat16 input = src.Float<SimFloat16>(i); - dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input)); + dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input)); } } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float<float>(i); - dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); + dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float<double>(i); - dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); + dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input)); } } return dst; @@ -5354,18 +6112,18 @@ LogicVRegister Simulator::frecpe(VectorFormat vform, if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { SimFloat16 input = src.Float<SimFloat16>(i); - dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round)); + dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round)); } } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float<float>(i); - dst.SetFloat(i, FPRecipEstimate<float>(input, round)); + dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float<double>(i); - dst.SetFloat(i, FPRecipEstimate<double>(input, round)); + dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round)); } } return dst; @@ -5426,6 +6184,47 @@ LogicVRegister Simulator::urecpe(VectorFormat vform, return dst; } +LogicPRegister Simulator::pfalse(LogicPRegister dst) { + dst.Clear(); + return dst; +} + +LogicPRegister Simulator::pfirst(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + int first_pg = GetFirstActive(kFormatVnB, pg); + VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB)); + mov(dst, src); + if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true); + return dst; +} + +LogicPRegister Simulator::ptrue(VectorFormat vform, + LogicPRegister dst, + int pattern) { + int count = GetPredicateConstraintLaneCount(vform, pattern); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetActive(vform, i, i < count); + } + return dst; +} + +LogicPRegister Simulator::pnext(VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + int next = GetLastActive(vform, src) + 1; + while (next < LaneCountFromFormat(vform)) { + if (pg.IsActive(vform, next)) break; + next++; + } + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetActive(vform, i, (i == next)); + } + return dst; +} + template <typename T> LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, @@ -5477,49 +6276,1143 @@ LogicVRegister Simulator::frecpx(VectorFormat vform, return dst; } +LogicVRegister Simulator::ftsmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister maybe_neg_src1; + + // The bottom bit of src2 controls the sign of the result. Use it to + // conditionally invert the sign of one `fmul` operand. + shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1); + eor(vform, maybe_neg_src1, maybe_neg_src1, src1); + + // Multiply src1 by the modified neg_src1, which is potentially its negation. + // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1, + // rather than neg_src1, must be the first source argument. + fmul(vform, dst, src1, maybe_neg_src1); + + return dst; +} + +LogicVRegister Simulator::ftssel(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + unsigned lane_bits = LaneSizeInBitsFromFormat(vform); + uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1); + uint64_t one; + + if (lane_bits == kHRegSize) { + one = Float16ToRawbits(Float16(1.0)); + } else if (lane_bits == kSRegSize) { + one = FloatToRawbits(1.0); + } else { + VIXL_ASSERT(lane_bits == kDRegSize); + one = DoubleToRawbits(1.0); + } + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Use integer accessors for this operation, as this is a data manipulation + // task requiring no calculation. + uint64_t op = src1.Uint(vform, i); + + // Only the bottom two bits of the src2 register are significant, indicating + // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1 + // determines the sign of the value written to dst. + uint64_t q = src2.Uint(vform, i); + if ((q & 1) == 1) op = one; + if ((q & 2) == 2) op ^= sign_bit; + + dst.SetUint(vform, i, op); + } + + return dst; +} + +template <typename T> +LogicVRegister Simulator::FTMaddHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + uint64_t coeff_pos, + uint64_t coeff_neg) { + SimVRegister zero; + dup_immediate(kFormatVnB, zero, 0); + + SimVRegister cf; + SimVRegister cfn; + dup_immediate(vform, cf, coeff_pos); + dup_immediate(vform, cfn, coeff_neg); + + // The specification requires testing the top bit of the raw value, rather + // than the sign of the floating point number, so use an integer comparison + // here. + SimPRegister is_neg; + SVEIntCompareVectorsHelper(lt, + vform, + is_neg, + GetPTrue(), + src2, + zero, + false, + LeaveFlags); + mov_merging(vform, cf, is_neg, cfn); + + SimVRegister temp; + fabs_<T>(vform, temp, src2); + fmla<T>(vform, cf, cf, src1, temp); + mov(vform, dst, cf); + return dst; +} + + +LogicVRegister Simulator::ftmad(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + unsigned index) { + static const uint64_t ftmad_coeff16[] = {0x3c00, + 0xb155, + 0x2030, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x3c00, + 0xb800, + 0x293a, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000}; + + static const uint64_t ftmad_coeff32[] = {0x3f800000, + 0xbe2aaaab, + 0x3c088886, + 0xb95008b9, + 0x36369d6d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x3f800000, + 0xbf000000, + 0x3d2aaaa6, + 0xbab60705, + 0x37cd37cc, + 0x00000000, + 0x00000000, + 0x00000000}; + + static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000, + 0xbfc5555555555543, + 0x3f8111111110f30c, + 0xbf2a01a019b92fc6, + 0x3ec71de351f3d22b, + 0xbe5ae5e2b60f7b91, + 0x3de5d8408868552f, + 0x0000000000000000, + 0x3ff0000000000000, + 0xbfe0000000000000, + 0x3fa5555555555536, + 0xbf56c16c16c13a0b, + 0x3efa01a019b1e8d8, + 0xbe927e4f7282f468, + 0x3e21ee96d2641b13, + 0xbda8f76380fbb401}; + VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64)); + VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64)); + VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64)); + + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + FTMaddHelper<SimFloat16>(vform, + dst, + src1, + src2, + ftmad_coeff16[index], + ftmad_coeff16[index + 8]); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + FTMaddHelper<float>(vform, + dst, + src1, + src2, + ftmad_coeff32[index], + ftmad_coeff32[index + 8]); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + FTMaddHelper<double>(vform, + dst, + src1, + src2, + ftmad_coeff64[index], + ftmad_coeff64[index + 8]); + } + return dst; +} + +LogicVRegister Simulator::fexpa(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045, + 0x005d, 0x0075, 0x008e, 0x00a8, + 0x00c2, 0x00dc, 0x00f8, 0x0114, + 0x0130, 0x014d, 0x016b, 0x0189, + 0x01a8, 0x01c8, 0x01e8, 0x0209, + 0x022b, 0x024e, 0x0271, 0x0295, + 0x02ba, 0x02e0, 0x0306, 0x032e, + 0x0356, 0x037f, 0x03a9, 0x03d4}; + + static const uint64_t fexpa_coeff32[] = + {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f, + 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b, + 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532, + 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, + 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf, + 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75, + 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd, + 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, + 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3, + 0x7d3e0c}; + + static const uint64_t fexpa_coeff64[] = + {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8, + 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0, + 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6, + 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b, + 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7, + 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0, + 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da, + 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225, + 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9, + 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed, + 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50, + 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf, + 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2, + 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c, + 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6, + 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8}; + + unsigned lane_size = LaneSizeInBitsFromFormat(vform); + int index_highbit = 5; + int op_highbit, op_shift; + const uint64_t* fexpa_coeff; + + if (lane_size == kHRegSize) { + index_highbit = 4; + VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1))); + fexpa_coeff = fexpa_coeff16; + op_highbit = 9; + op_shift = 10; + } else if (lane_size == kSRegSize) { + VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1))); + fexpa_coeff = fexpa_coeff32; + op_highbit = 13; + op_shift = 23; + } else { + VIXL_ASSERT(lane_size == kDRegSize); + VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1))); + fexpa_coeff = fexpa_coeff64; + op_highbit = 16; + op_shift = 52; + } + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t op = src.Uint(vform, i); + uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)]; + result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift); + dst.SetUint(vform, i, result); + } + return dst; +} + +template <typename T> +LogicVRegister Simulator::fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + T two = T(2.0); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T s1 = src1.Float<T>(i); + if (!IsNaN(s1)) { + int64_t scale = src2.Int(vform, i); + // TODO: this is a low-performance implementation, but it's simple and + // less likely to be buggy. Consider replacing it with something faster. + + // Scales outside of these bounds become infinity or zero, so there's no + // point iterating further. + scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048); + + // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and + // decrement scale until it's zero. + while (scale-- > 0) { + s1 = FPMul(s1, two); + } + + // If scale is negative, divide by two and increment scale until it's + // zero. Initially, scale is (src2 - 1), so we pre-increment. + while (++scale < 0) { + s1 = FPDiv(s1, two); + } + } + dst.SetFloat<T>(i, s1); + } + return dst; +} + +LogicVRegister Simulator::fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fscale<SimFloat16>(vform, dst, src1, src2); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fscale<float>(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fscale<double>(vform, dst, src1, src2); + } + return dst; +} + +LogicVRegister Simulator::scvtf(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + + switch (dst_data_size_in_bits) { + case kHRegSize: { + SimFloat16 result = FixedToFloat16(value, fbits, round); + dst.SetUint(vform, i, Float16ToRawbits(result)); + break; + } + case kSRegSize: { + float result = FixedToFloat(value, fbits, round); + dst.SetUint(vform, i, FloatToRawbits(result)); + break; + } + case kDRegSize: { + double result = FixedToDouble(value, fbits, round); + dst.SetUint(vform, i, DoubleToRawbits(result)); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } + } + + return dst; +} + LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding round) { + return scvtf(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} + +LogicVRegister Simulator::ucvtf(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round); - dst.SetFloat<SimFloat16>(i, result); - } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); - dst.SetFloat<float>(i, result); - } else { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); - dst.SetFloat<double>(i, result); + if (!pg.IsActive(vform, i)) continue; + + uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + + switch (dst_data_size_in_bits) { + case kHRegSize: { + SimFloat16 result = UFixedToFloat16(value, fbits, round); + dst.SetUint(vform, i, Float16ToRawbits(result)); + break; + } + case kSRegSize: { + float result = UFixedToFloat(value, fbits, round); + dst.SetUint(vform, i, FloatToRawbits(result)); + break; + } + case kDRegSize: { + double result = UFixedToDouble(value, fbits, round); + dst.SetUint(vform, i, DoubleToRawbits(result)); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; } } + return dst; } - LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding round) { + return ucvtf(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} + +LogicVRegister Simulator::unpk(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + UnpackType unpack_type, + ExtendType extend_type) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + const int lane_count = LaneCountFromFormat(vform); + const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count; + + switch (extend_type) { + case kSignedExtend: { + int64_t result[kZRegMaxSizeInBytes]; + for (int i = 0; i < lane_count; ++i) { + result[i] = src.Int(vform_half, i + src_start_lane); + } + for (int i = 0; i < lane_count; ++i) { + dst.SetInt(vform, i, result[i]); + } + break; + } + case kUnsignedExtend: { + uint64_t result[kZRegMaxSizeInBytes]; + for (int i = 0; i < lane_count; ++i) { + result[i] = src.Uint(vform_half, i + src_start_lane); + } + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + break; + } + default: + VIXL_UNREACHABLE(); + } + return dst; +} + +LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond, + VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& mask, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements, + FlagsUpdate flags) { + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + bool result = false; + if (mask.IsActive(vform, lane)) { + int64_t op1 = 0xbadbeef; + int64_t op2 = 0xbadbeef; + int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize; + switch (cond) { + case eq: + case ge: + case gt: + case lt: + case le: + case ne: + op1 = src1.Int(vform, lane); + op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane) + : src2.Int(vform, lane); + break; + case hi: + case hs: + case ls: + case lo: + op1 = src1.Uint(vform, lane); + op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane) + : src2.Uint(vform, lane); + break; + default: + VIXL_UNREACHABLE(); + } + + switch (cond) { + case eq: + result = (op1 == op2); + break; + case ne: + result = (op1 != op2); + break; + case ge: + result = (op1 >= op2); + break; + case gt: + result = (op1 > op2); + break; + case le: + result = (op1 <= op2); + break; + case lt: + result = (op1 < op2); + break; + case hs: + result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2)); + break; + case hi: + result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2)); + break; + case ls: + result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2)); + break; + case lo: + result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2)); + break; + default: + VIXL_UNREACHABLE(); + } + } + dst.SetActive(vform, lane, result); + } + + if (flags == SetFlags) PredTest(vform, mask, dst); + + return dst; +} + +LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op, + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements) { + unsigned lane_size = LaneSizeInBitsFromFormat(vform); + VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform; + + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + int shift_src_lane = lane; + if (is_wide_elements) { + // If the shift amount comes from wide elements, select the D-sized lane + // which occupies the corresponding lanes of the value to be shifted. + shift_src_lane = (lane * lane_size) / kDRegSize; + } + uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane); + + // Saturate shift_amount to the size of the lane that will be shifted. + if (shift_amount > lane_size) shift_amount = lane_size; + + uint64_t value = src1.Uint(vform, lane); + int64_t result = ShiftOperand(lane_size, + value, + shift_op, + static_cast<unsigned>(shift_amount)); + dst.SetUint(vform, lane, result); + } + + return dst; +} + +LogicVRegister Simulator::asrd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int shift) { + VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <= + LaneSizeInBitsFromFormat(vform))); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round); - dst.SetFloat<SimFloat16>(i, result); - } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); - dst.SetFloat<float>(i, result); + int64_t value = src1.Int(vform, i); + if (shift <= 63) { + if (value < 0) { + // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely + // cast to int64_t, and cannot cause signed overflow in the result. + value = value + GetUintMask(shift); + } + value = ShiftOperand(kDRegSize, value, ASR, shift); } else { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); - dst.SetFloat<double>(i, result); + value = 0; } + dst.SetInt(vform, i, value); } return dst; } +LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper( + LogicalOp logical_op, + VectorFormat vform, + LogicVRegister zd, + const LogicVRegister& zn, + const LogicVRegister& zm) { + VIXL_ASSERT(IsSVEFormat(vform)); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t op1 = zn.Uint(vform, i); + uint64_t op2 = zm.Uint(vform, i); + uint64_t result; + switch (logical_op) { + case AND: + result = op1 & op2; + break; + case BIC: + result = op1 & ~op2; + break; + case EOR: + result = op1 ^ op2; + break; + case ORR: + result = op1 | op2; + break; + default: + result = 0; + VIXL_UNIMPLEMENTED(); + } + zd.SetUint(vform, i, result); + } + + return zd; +} + +LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, + LogicPRegister pd, + const LogicPRegister& pn, + const LogicPRegister& pm) { + for (int i = 0; i < pn.GetChunkCount(); i++) { + LogicPRegister::ChunkType op1 = pn.GetChunk(i); + LogicPRegister::ChunkType op2 = pm.GetChunk(i); + LogicPRegister::ChunkType result; + switch (op) { + case ANDS_p_p_pp_z: + case AND_p_p_pp_z: + result = op1 & op2; + break; + case BICS_p_p_pp_z: + case BIC_p_p_pp_z: + result = op1 & ~op2; + break; + case EORS_p_p_pp_z: + case EOR_p_p_pp_z: + result = op1 ^ op2; + break; + case NANDS_p_p_pp_z: + case NAND_p_p_pp_z: + result = ~(op1 & op2); + break; + case NORS_p_p_pp_z: + case NOR_p_p_pp_z: + result = ~(op1 | op2); + break; + case ORNS_p_p_pp_z: + case ORN_p_p_pp_z: + result = op1 | ~op2; + break; + case ORRS_p_p_pp_z: + case ORR_p_p_pp_z: + result = op1 | op2; + break; + default: + result = 0; + VIXL_UNIMPLEMENTED(); + } + pd.SetChunk(i, result); + } + return pd; +} + +LogicVRegister Simulator::SVEBitwiseImmHelper( + SVEBitwiseLogicalWithImm_UnpredicatedOp op, + VectorFormat vform, + LogicVRegister zd, + uint64_t imm) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t op1 = zd.Uint(vform, i); + uint64_t result; + switch (op) { + case AND_z_zi: + result = op1 & imm; + break; + case EOR_z_zi: + result = op1 ^ imm; + break; + case ORR_z_zi: + result = op1 | imm; + break; + default: + result = 0; + VIXL_UNIMPLEMENTED(); + } + zd.SetUint(vform, i, result); + } + + return zd; +} + +void Simulator::SVEStructuredStoreHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr) { + VIXL_ASSERT(zt_code < kNumberOfZRegisters); + + int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); + int msize_in_bytes = addr.GetMsizeInBytes(); + int reg_count = addr.GetRegCount(); + + VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); + VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); + + unsigned zt_codes[4] = {zt_code, + (zt_code + 1) % kNumberOfZRegisters, + (zt_code + 2) % kNumberOfZRegisters, + (zt_code + 3) % kNumberOfZRegisters}; + + LogicVRegister zt[4] = { + ReadVRegister(zt_codes[0]), + ReadVRegister(zt_codes[1]), + ReadVRegister(zt_codes[2]), + ReadVRegister(zt_codes[3]), + }; + + // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes + // are ignored, so read the source register using the VectorFormat that + // corresponds with the storage format, and multiply the index accordingly. + VectorFormat unpack_vform = + SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); + int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + for (int r = 0; r < reg_count; r++) { + uint64_t element_address = addr.GetElementAddress(i, r); + zt[r].WriteUintToMem(unpack_vform, i << unpack_shift, element_address); + } + } + + if (ShouldTraceWrites()) { + PrintRegisterFormat format = GetPrintRegisterFormat(vform); + if (esize_in_bytes_log2 == msize_in_bytes_log2) { + // Use an FP format where it's likely that we're accessing FP data. + format = GetPrintRegisterFormatTryFP(format); + } + // Stores don't represent a change to the source register's value, so only + // print the relevant part of the value. + format = GetPrintRegPartial(format); + + PrintZStructAccess(zt_code, + reg_count, + pg, + format, + msize_in_bytes, + "->", + addr); + } +} + +void Simulator::SVEStructuredLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + bool is_signed) { + int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); + int msize_in_bytes = addr.GetMsizeInBytes(); + int reg_count = addr.GetRegCount(); + + VIXL_ASSERT(zt_code < kNumberOfZRegisters); + VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); + VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); + + unsigned zt_codes[4] = {zt_code, + (zt_code + 1) % kNumberOfZRegisters, + (zt_code + 2) % kNumberOfZRegisters, + (zt_code + 3) % kNumberOfZRegisters}; + LogicVRegister zt[4] = { + ReadVRegister(zt_codes[0]), + ReadVRegister(zt_codes[1]), + ReadVRegister(zt_codes[2]), + ReadVRegister(zt_codes[3]), + }; + + VectorFormat unpack_vform = + SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + for (int r = 0; r < reg_count; r++) { + uint64_t element_address = addr.GetElementAddress(i, r); + + if (!pg.IsActive(vform, i)) { + zt[r].SetUint(vform, i, 0); + continue; + } + + if (is_signed) { + zt[r].ReadIntFromMem(vform, + LaneSizeInBitsFromFormat(unpack_vform), + i, + element_address); + + } else { + zt[r].ReadUintFromMem(vform, + LaneSizeInBitsFromFormat(unpack_vform), + i, + element_address); + } + } + } + + if (ShouldTraceVRegs()) { + PrintRegisterFormat format = GetPrintRegisterFormat(vform); + if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) { + // Use an FP format where it's likely that we're accessing FP data. + format = GetPrintRegisterFormatTryFP(format); + } + PrintZStructAccess(zt_code, + reg_count, + pg, + format, + msize_in_bytes, + "<-", + addr); + } +} + +LogicPRegister Simulator::brka(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn) { + bool break_ = false; + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + if (pg.IsActive(kFormatVnB, i)) { + pd.SetActive(kFormatVnB, i, !break_); + break_ |= pn.IsActive(kFormatVnB, i); + } + } + + return pd; +} + +LogicPRegister Simulator::brkb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn) { + bool break_ = false; + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + if (pg.IsActive(kFormatVnB, i)) { + break_ |= pn.IsActive(kFormatVnB, i); + pd.SetActive(kFormatVnB, i, !break_); + } + } + + return pd; +} + +LogicPRegister Simulator::brkn(LogicPRegister pdm, + const LogicPRegister& pg, + const LogicPRegister& pn) { + if (!IsLastActive(kFormatVnB, pg, pn)) { + pfalse(pdm); + } + return pdm; +} + +LogicPRegister Simulator::brkpa(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm) { + bool last_active = IsLastActive(kFormatVnB, pg, pn); + + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + bool active = false; + if (pg.IsActive(kFormatVnB, i)) { + active = last_active; + last_active = last_active && !pm.IsActive(kFormatVnB, i); + } + pd.SetActive(kFormatVnB, i, active); + } + + return pd; +} + +LogicPRegister Simulator::brkpb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm) { + bool last_active = IsLastActive(kFormatVnB, pg, pn); + + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + bool active = false; + if (pg.IsActive(kFormatVnB, i)) { + last_active = last_active && !pm.IsActive(kFormatVnB, i); + active = last_active; + } + pd.SetActive(kFormatVnB, i, active); + } + + return pd; +} + +void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + SVEFaultTolerantLoadType type, + bool is_signed) { + int esize_in_bytes = LaneSizeInBytesFromFormat(vform); + int msize_in_bits = addr.GetMsizeInBits(); + int msize_in_bytes = addr.GetMsizeInBytes(); + + VIXL_ASSERT(zt_code < kNumberOfZRegisters); + VIXL_ASSERT(esize_in_bytes >= msize_in_bytes); + VIXL_ASSERT(addr.GetRegCount() == 1); + + LogicVRegister zt = ReadVRegister(zt_code); + LogicPRegister ffr = ReadFFR(); + + // Non-faulting loads are allowed to fail arbitrarily. To stress user + // code, fail a random element in roughly one in eight full-vector loads. + uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_)); + int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = 0; + + if (pg.IsActive(vform, i)) { + uint64_t element_address = addr.GetElementAddress(i, 0); + + if (type == kSVEFirstFaultLoad) { + // First-faulting loads always load the first active element, regardless + // of FFR. The result will be discarded if its FFR lane is inactive, but + // it could still generate a fault. + value = Memory::Read(msize_in_bytes, element_address); + // All subsequent elements have non-fault semantics. + type = kSVENonFaultLoad; + + } else if (ffr.IsActive(vform, i)) { + // Simulation of fault-tolerant loads relies on system calls, and is + // likely to be relatively slow, so we only actually perform the load if + // its FFR lane is active. + + bool can_read = (i < fake_fault_at_lane) && + CanReadMemory(element_address, msize_in_bytes); + if (can_read) { + value = Memory::Read(msize_in_bytes, element_address); + } else { + // Propagate the fault to the end of FFR. + for (int j = i; j < LaneCountFromFormat(vform); j++) { + ffr.SetActive(vform, j, false); + } + } + } + } + + // The architecture permits a few possible results for inactive FFR lanes + // (including those caused by a fault in this instruction). We choose to + // leave the register value unchanged (like merging predication) because + // no other input to this instruction can have the same behaviour. + // + // Note that this behaviour takes precedence over pg's zeroing predication. + + if (ffr.IsActive(vform, i)) { + int msb = msize_in_bits - 1; + if (is_signed) { + zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value)); + } else { + zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value)); + } + } + } + + if (ShouldTraceVRegs()) { + PrintRegisterFormat format = GetPrintRegisterFormat(vform); + if ((esize_in_bytes == msize_in_bytes) && !is_signed) { + // Use an FP format where it's likely that we're accessing FP data. + format = GetPrintRegisterFormatTryFP(format); + } + // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess + // expects a single mask, so combine the two predicates. + SimPRegister mask; + SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr); + PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr); + } +} + +void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, + VectorFormat vform, + SVEOffsetModifier mod) { + bool is_signed = instr->ExtractBit(14) == 0; + bool is_ff = instr->ExtractBit(13) == 1; + // Note that these instructions don't use the Dtype encoding. + int msize_in_bytes_log2 = instr->ExtractBits(24, 23); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()); + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + vform, + mod, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + if (is_ff) { + SVEFaultTolerantLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + kSVEFirstFaultLoad, + is_signed); + } else { + SVEStructuredLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + is_signed); + } +} + +int Simulator::GetFirstActive(VectorFormat vform, + const LogicPRegister& pg) const { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (pg.IsActive(vform, i)) return i; + } + return -1; +} + +int Simulator::GetLastActive(VectorFormat vform, + const LogicPRegister& pg) const { + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + if (pg.IsActive(vform, i)) return i; + } + return -1; +} + +int Simulator::CountActiveLanes(VectorFormat vform, + const LogicPRegister& pg) const { + int count = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + count += pg.IsActive(vform, i) ? 1 : 0; + } + return count; +} + +int Simulator::CountActiveAndTrueLanes(VectorFormat vform, + const LogicPRegister& pg, + const LogicPRegister& pn) const { + int count = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0; + } + return count; +} + +int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform, + int pattern) const { + VIXL_ASSERT(IsSVEFormat(vform)); + int all = LaneCountFromFormat(vform); + VIXL_ASSERT(all > 0); + + switch (pattern) { + case SVE_VL1: + case SVE_VL2: + case SVE_VL3: + case SVE_VL4: + case SVE_VL5: + case SVE_VL6: + case SVE_VL7: + case SVE_VL8: + // VL1-VL8 are encoded directly. + VIXL_STATIC_ASSERT(SVE_VL1 == 1); + VIXL_STATIC_ASSERT(SVE_VL8 == 8); + return (pattern <= all) ? pattern : 0; + case SVE_VL16: + case SVE_VL32: + case SVE_VL64: + case SVE_VL128: + case SVE_VL256: { + // VL16-VL256 are encoded as log2(N) + c. + int min = 16 << (pattern - SVE_VL16); + return (min <= all) ? min : 0; + } + // Special cases. + case SVE_POW2: + return 1 << HighestSetBitPosition(all); + case SVE_MUL4: + return all - (all % 4); + case SVE_MUL3: + return all - (all % 3); + case SVE_ALL: + return all; + } + // Unnamed cases archicturally return 0. + return 0; +} + +uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const { + if (IsContiguous()) { + return base_ + (lane * GetRegCount()) * GetMsizeInBytes(); + } + + VIXL_ASSERT(IsScatterGather()); + VIXL_ASSERT(vector_ != NULL); + + // For scatter-gather accesses, we need to extract the offset from vector_, + // and apply modifiers. + + uint64_t offset = 0; + switch (vector_form_) { + case kFormatVnS: + offset = vector_->GetLane<uint32_t>(lane); + break; + case kFormatVnD: + offset = vector_->GetLane<uint64_t>(lane); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + switch (vector_mod_) { + case SVE_MUL_VL: + VIXL_UNIMPLEMENTED(); + break; + case SVE_LSL: + // We apply the shift below. There's nothing to do here. + break; + case NO_SVE_OFFSET_MODIFIER: + VIXL_ASSERT(vector_shift_ == 0); + break; + case SVE_UXTW: + offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset); + break; + case SVE_SXTW: + offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset); + break; + } + + return base_ + (offset << vector_shift_); +} + } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc index 85954fc9..56c6eaf6 100644 --- a/src/aarch64/macro-assembler-aarch64.cc +++ b/src/aarch64/macro-assembler-aarch64.cc @@ -65,7 +65,7 @@ LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { void LiteralPool::Reset() { - std::vector<RawLiteral *>::iterator it, end; + std::vector<RawLiteral*>::iterator it, end; for (it = entries_.begin(), end = entries_.end(); it != end; ++it) { RawLiteral* literal = *it; if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) { @@ -145,7 +145,7 @@ void LiteralPool::Emit(EmitOption option) { } // Now populate the literal pool. - std::vector<RawLiteral *>::iterator it, end; + std::vector<RawLiteral*>::iterator it, end; for (it = entries_.begin(), end = entries_.end(); it != end; ++it) { VIXL_ASSERT((*it)->IsUsed()); masm_->place(*it); @@ -321,11 +321,13 @@ MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic) generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), sp_(sp), tmp_list_(ip0, ip1), - fptmp_list_(d31), + v_tmp_list_(d31), + p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), current_scratch_scope_(NULL), literal_pool_(this), veneer_pool_(this), - recommended_checkpoint_(Pool::kNoCheckpointRequired) { + recommended_checkpoint_(Pool::kNoCheckpointRequired), + fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { checkpoint_ = GetNextCheckPoint(); #ifndef VIXL_DEBUG USE(allow_macro_instructions_); @@ -342,11 +344,13 @@ MacroAssembler::MacroAssembler(size_t capacity, generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), sp_(sp), tmp_list_(ip0, ip1), - fptmp_list_(d31), + v_tmp_list_(d31), + p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), current_scratch_scope_(NULL), literal_pool_(this), veneer_pool_(this), - recommended_checkpoint_(Pool::kNoCheckpointRequired) { + recommended_checkpoint_(Pool::kNoCheckpointRequired), + fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { checkpoint_ = GetNextCheckPoint(); } @@ -361,11 +365,13 @@ MacroAssembler::MacroAssembler(byte* buffer, generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), sp_(sp), tmp_list_(ip0, ip1), - fptmp_list_(d31), + v_tmp_list_(d31), + p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), current_scratch_scope_(NULL), literal_pool_(this), veneer_pool_(this), - recommended_checkpoint_(Pool::kNoCheckpointRequired) { + recommended_checkpoint_(Pool::kNoCheckpointRequired), + fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { checkpoint_ = GetNextCheckPoint(); } @@ -819,6 +825,12 @@ void MacroAssembler::LogicalMacro(const Register& rd, // * 1 instruction to move to sp MacroEmissionCheckScope guard(this); UseScratchRegisterScope temps(this); + // Use `rd` as a temp, if we can. + temps.Include(rd); + // We read `rn` after evaluating `operand`. + temps.Exclude(rn); + // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`) + // because we don't need it after it is evaluated. if (operand.IsImmediate()) { uint64_t immediate = operand.GetImmediate(); @@ -886,6 +898,7 @@ void MacroAssembler::LogicalMacro(const Register& rd, } else { // Immediate can't be encoded: synthesize using move immediate. Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Aliases(rn)); // If the left-hand input is the stack pointer, we can't pre-shift the // immediate, as the encoding won't allow the subsequent post shift. @@ -910,8 +923,8 @@ void MacroAssembler::LogicalMacro(const Register& rd, operand.GetRegister().Is64Bits() || ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX))); - temps.Exclude(operand.GetRegister()); Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Aliases(rn)); EmitExtendShift(temp, operand.GetRegister(), operand.GetExtend(), @@ -1139,17 +1152,13 @@ void MacroAssembler::Mvn(const Register& rd, const Operand& operand) { // Call the macro assembler for generic immediates. Mvn(rd, operand.GetImmediate()); } else if (operand.IsExtendedRegister()) { - UseScratchRegisterScope temps(this); - temps.Exclude(operand.GetRegister()); - // Emit two instructions for the extend case. This differs from Mov, as // the extend and invert can't be achieved in one instruction. - Register temp = temps.AcquireSameSizeAs(rd); - EmitExtendShift(temp, + EmitExtendShift(rd, operand.GetRegister(), operand.GetExtend(), operand.GetShiftAmount()); - mvn(rd, Operand(temp)); + mvn(rd, rd); } else { // Otherwise, register and shifted register cases can be handled by the // assembler directly, using orn. @@ -1418,12 +1427,15 @@ void MacroAssembler::Add(const Register& rd, const Operand& operand, FlagsUpdate S) { VIXL_ASSERT(allow_macro_instructions_); - if (operand.IsImmediate() && (operand.GetImmediate() < 0) && - IsImmAddSub(-operand.GetImmediate())) { - AddSubMacro(rd, rn, -operand.GetImmediate(), S, SUB); - } else { - AddSubMacro(rd, rn, operand, S, ADD); + if (operand.IsImmediate()) { + int64_t imm = operand.GetImmediate(); + if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) && + IsImmAddSub(-imm)) { + AddSubMacro(rd, rn, -imm, S, SUB); + return; + } } + AddSubMacro(rd, rn, operand, S, ADD); } @@ -1439,12 +1451,15 @@ void MacroAssembler::Sub(const Register& rd, const Operand& operand, FlagsUpdate S) { VIXL_ASSERT(allow_macro_instructions_); - if (operand.IsImmediate() && (operand.GetImmediate() < 0) && - IsImmAddSub(-operand.GetImmediate())) { - AddSubMacro(rd, rn, -operand.GetImmediate(), S, ADD); - } else { - AddSubMacro(rd, rn, operand, S, SUB); + if (operand.IsImmediate()) { + int64_t imm = operand.GetImmediate(); + if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) && + IsImmAddSub(-imm)) { + AddSubMacro(rd, rn, -imm, S, ADD); + return; + } } + AddSubMacro(rd, rn, operand, S, SUB); } @@ -1757,6 +1772,12 @@ void MacroAssembler::AddSubMacro(const Register& rd, (rn.IsZero() && !operand.IsShiftedRegister()) || (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) { UseScratchRegisterScope temps(this); + // Use `rd` as a temp, if we can. + temps.Include(rd); + // We read `rn` after evaluating `operand`. + temps.Exclude(rn); + // It doesn't matter if `operand` is in `temps` (e.g. because it alises + // `rd`) because we don't need it after it is evaluated. Register temp = temps.AcquireSameSizeAs(rn); if (operand.IsImmediate()) { PreShiftImmMode mode = kAnyShift; @@ -1842,6 +1863,12 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd, // * 1 instruction for add/sub MacroEmissionCheckScope guard(this); UseScratchRegisterScope temps(this); + // Use `rd` as a temp, if we can. + temps.Include(rd); + // We read `rn` after evaluating `operand`. + temps.Exclude(rn); + // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`) + // because we don't need it after it is evaluated. if (operand.IsImmediate() || (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) { @@ -1856,7 +1883,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd, VIXL_ASSERT( IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2, operand.GetShiftAmount())); - temps.Exclude(operand.GetRegister()); Register temp = temps.AcquireSameSizeAs(rn); EmitShift(temp, operand.GetRegister(), @@ -1872,7 +1898,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd, VIXL_ASSERT( operand.GetRegister().Is64Bits() || ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX))); - temps.Exclude(operand.GetRegister()); Register temp = temps.AcquireSameSizeAs(rn); EmitExtendShift(temp, operand.GetRegister(), @@ -2397,7 +2422,8 @@ void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op, // We do not handle pre-indexing or post-indexing. VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex())); VIXL_ASSERT(!registers.Overlaps(tmp_list_)); - VIXL_ASSERT(!registers.Overlaps(fptmp_list_)); + VIXL_ASSERT(!registers.Overlaps(v_tmp_list_)); + VIXL_ASSERT(!registers.Overlaps(p_tmp_list_)); VIXL_ASSERT(!registers.IncludesAliasOf(sp)); UseScratchRegisterScope temps(this); @@ -2481,7 +2507,7 @@ void MacroAssembler::BumpSystemStackPointer(const Operand& space) { } -// TODO(all): Fix printf for NEON registers. +// TODO(all): Fix printf for NEON and SVE registers. // This is the main Printf implementation. All callee-saved registers are // preserved, but NZCV and the caller-saved registers may be clobbered. @@ -2764,32 +2790,6 @@ void MacroAssembler::Log(TraceParameters parameters) { } -void MacroAssembler::EnableInstrumentation() { - VIXL_ASSERT(!isprint(InstrumentStateEnable)); - ExactAssemblyScope scope(this, kInstructionSize); - movn(xzr, InstrumentStateEnable); -} - - -void MacroAssembler::DisableInstrumentation() { - VIXL_ASSERT(!isprint(InstrumentStateDisable)); - ExactAssemblyScope scope(this, kInstructionSize); - movn(xzr, InstrumentStateDisable); -} - - -void MacroAssembler::AnnotateInstrumentation(const char* marker_name) { - VIXL_ASSERT(strlen(marker_name) == 2); - - // We allow only printable characters in the marker names. Unprintable - // characters are reserved for controlling features of the instrumentation. - VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1])); - - ExactAssemblyScope scope(this, kInstructionSize); - movn(xzr, (marker_name[1] << 8) | marker_name[0]); -} - - void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) { ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode); } @@ -2870,10 +2870,13 @@ void UseScratchRegisterScope::Open(MacroAssembler* masm) { CPURegList* available = masm->GetScratchRegisterList(); CPURegList* available_v = masm->GetScratchVRegisterList(); + CPURegList* available_p = masm->GetScratchPRegisterList(); old_available_ = available->GetList(); old_available_v_ = available_v->GetList(); + old_available_p_ = available_p->GetList(); VIXL_ASSERT(available->GetType() == CPURegister::kRegister); VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister); + VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister); parent_ = masm->GetCurrentScratchRegisterScope(); masm->SetCurrentScratchRegisterScope(this); @@ -2891,6 +2894,7 @@ void UseScratchRegisterScope::Close() { masm_->GetScratchRegisterList()->SetList(old_available_); masm_->GetScratchVRegisterList()->SetList(old_available_v_); + masm_->GetScratchPRegisterList()->SetList(old_available_p_); masm_ = NULL; } @@ -2899,44 +2903,46 @@ void UseScratchRegisterScope::Close() { bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const { return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) || - masm_->GetScratchVRegisterList()->IncludesAliasOf(reg); + masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) || + masm_->GetScratchPRegisterList()->IncludesAliasOf(reg); } - Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) { - int code = AcquireNextAvailable(masm_->GetScratchRegisterList()).GetCode(); + int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode(); return Register(code, size_in_bits); } VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) { - int code = AcquireNextAvailable(masm_->GetScratchVRegisterList()).GetCode(); + int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode(); return VRegister(code, size_in_bits); } void UseScratchRegisterScope::Release(const CPURegister& reg) { VIXL_ASSERT(masm_ != NULL); - if (reg.IsRegister()) { - ReleaseByCode(masm_->GetScratchRegisterList(), reg.GetCode()); - } else if (reg.IsVRegister()) { - ReleaseByCode(masm_->GetScratchVRegisterList(), reg.GetCode()); - } else { - VIXL_ASSERT(reg.IsNone()); - } + + // Release(NoReg) has no effect. + if (reg.IsNone()) return; + + ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode()); } void UseScratchRegisterScope::Include(const CPURegList& list) { VIXL_ASSERT(masm_ != NULL); + + // Including an empty list has no effect. + if (list.IsEmpty()) return; + VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister); + + RegList reg_list = list.GetList(); if (list.GetType() == CPURegister::kRegister) { // Make sure that neither sp nor xzr are included the list. - IncludeByRegList(masm_->GetScratchRegisterList(), - list.GetList() & ~(xzr.GetBit() | sp.GetBit())); - } else { - VIXL_ASSERT(list.GetType() == CPURegister::kVRegister); - IncludeByRegList(masm_->GetScratchVRegisterList(), list.GetList()); + reg_list &= ~(xzr.GetBit() | sp.GetBit()); } + + IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list); } @@ -2964,13 +2970,43 @@ void UseScratchRegisterScope::Include(const VRegister& reg1, } -void UseScratchRegisterScope::Exclude(const CPURegList& list) { - if (list.GetType() == CPURegister::kRegister) { - ExcludeByRegList(masm_->GetScratchRegisterList(), list.GetList()); - } else { - VIXL_ASSERT(list.GetType() == CPURegister::kVRegister); - ExcludeByRegList(masm_->GetScratchVRegisterList(), list.GetList()); +void UseScratchRegisterScope::Include(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + RegList include = 0; + RegList include_v = 0; + RegList include_p = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4}; + + for (size_t i = 0; i < ArrayLength(regs); i++) { + RegList bit = regs[i].GetBit(); + switch (regs[i].GetBank()) { + case CPURegister::kNoRegisterBank: + // Include(NoReg) has no effect. + VIXL_ASSERT(regs[i].IsNone()); + break; + case CPURegister::kRRegisterBank: + include |= bit; + break; + case CPURegister::kVRegisterBank: + include_v |= bit; + break; + case CPURegister::kPRegisterBank: + include_p |= bit; + break; + } } + + IncludeByRegList(masm_->GetScratchRegisterList(), include); + IncludeByRegList(masm_->GetScratchVRegisterList(), include_v); + IncludeByRegList(masm_->GetScratchPRegisterList(), include_p); +} + + +void UseScratchRegisterScope::Exclude(const CPURegList& list) { + ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList()); } @@ -2988,9 +3024,9 @@ void UseScratchRegisterScope::Exclude(const VRegister& reg1, const VRegister& reg2, const VRegister& reg3, const VRegister& reg4) { - RegList excludefp = + RegList exclude_v = reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit(); - ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp); + ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v); } @@ -2999,22 +3035,33 @@ void UseScratchRegisterScope::Exclude(const CPURegister& reg1, const CPURegister& reg3, const CPURegister& reg4) { RegList exclude = 0; - RegList excludefp = 0; + RegList exclude_v = 0; + RegList exclude_p = 0; const CPURegister regs[] = {reg1, reg2, reg3, reg4}; for (size_t i = 0; i < ArrayLength(regs); i++) { - if (regs[i].IsRegister()) { - exclude |= regs[i].GetBit(); - } else if (regs[i].IsVRegister()) { - excludefp |= regs[i].GetBit(); - } else { - VIXL_ASSERT(regs[i].IsNone()); + RegList bit = regs[i].GetBit(); + switch (regs[i].GetBank()) { + case CPURegister::kNoRegisterBank: + // Exclude(NoReg) has no effect. + VIXL_ASSERT(regs[i].IsNone()); + break; + case CPURegister::kRRegisterBank: + exclude |= bit; + break; + case CPURegister::kVRegisterBank: + exclude_v |= bit; + break; + case CPURegister::kPRegisterBank: + exclude_p |= bit; + break; } } ExcludeByRegList(masm_->GetScratchRegisterList(), exclude); - ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp); + ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v); + ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p); } @@ -3023,13 +3070,15 @@ void UseScratchRegisterScope::ExcludeAll() { masm_->GetScratchRegisterList()->GetList()); ExcludeByRegList(masm_->GetScratchVRegisterList(), masm_->GetScratchVRegisterList()->GetList()); + ExcludeByRegList(masm_->GetScratchPRegisterList(), + masm_->GetScratchPRegisterList()->GetList()); } -CPURegister UseScratchRegisterScope::AcquireNextAvailable( - CPURegList* available) { - VIXL_CHECK(!available->IsEmpty()); - CPURegister result = available->PopLowestIndex(); +CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available, + RegList mask) { + VIXL_CHECK((available->GetList() & mask) != 0); + CPURegister result = available->PopLowestIndex(mask); VIXL_ASSERT(!AreAliased(result, xzr, sp)); return result; } @@ -3057,5 +3106,22 @@ void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available, available->SetList(available->GetList() & ~exclude); } +CPURegList* UseScratchRegisterScope::GetAvailableListFor( + CPURegister::RegisterBank bank) { + switch (bank) { + case CPURegister::kNoRegisterBank: + return NULL; + case CPURegister::kRRegisterBank: + return masm_->GetScratchRegisterList(); + case CPURegister::kVRegisterBank: + return masm_->GetScratchVRegisterList(); + case CPURegister::kPRegisterBank: + return masm_->GetScratchPRegisterList(); + return NULL; + } + VIXL_UNREACHABLE(); + return NULL; +} + } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 31db8dab..8becddbb 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -35,7 +35,6 @@ #include "../macro-assembler-interface.h" #include "assembler-aarch64.h" -#include "instrument-aarch64.h" // Required for runtime call support. // TODO: Break this dependency. We should be able to separate out the necessary // parts so that we don't need to include the whole simulator header. @@ -61,7 +60,7 @@ #define LSPAIR_MACRO_LIST(V) \ V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \ V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \ - V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x) + V(Ldpsw, Register&, rt, rt2, LDPSW_x) namespace vixl { namespace aarch64 { @@ -528,6 +527,57 @@ class MacroEmissionCheckScope : public EmissionCheckScope { }; +// This scope simplifies the handling of the SVE `movprfx` instruction. +// +// If dst.Aliases(src): +// - Start an ExactAssemblyScope(masm, kInstructionSize). +// Otherwise: +// - Start an ExactAssemblyScope(masm, 2 * kInstructionSize). +// - Generate a suitable `movprfx` instruction. +// +// In both cases, the ExactAssemblyScope is left with enough remaining space for +// exactly one destructive instruction. +class MovprfxHelperScope : public ExactAssemblyScope { + public: + inline MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const ZRegister& src); + + inline MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const PRegister& pg, + const ZRegister& src); + + // TODO: Implement constructors that examine _all_ sources. If `dst` aliases + // any other source register, we can't use `movprfx`. This isn't obviously + // useful, but the MacroAssembler should not generate invalid code for it. + // Valid behaviour can be implemented using `mov`. + // + // The best way to handle this in an instruction-agnostic way is probably to + // use variadic templates. + + private: + inline bool ShouldGenerateMovprfx(const ZRegister& dst, + const ZRegister& src) { + VIXL_ASSERT(AreSameLaneSize(dst, src)); + return !dst.Aliases(src); + } + + inline bool ShouldGenerateMovprfx(const ZRegister& dst, + const PRegister& pg, + const ZRegister& src) { + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + // We need to emit movprfx in two cases: + // 1. To give a predicated merging unary instruction zeroing predication. + // 2. To make destructive instructions constructive. + // + // There are no predicated zeroing instructions that can take movprfx, so we + // will never generate an unnecessary movprfx with this logic. + return pg.IsZeroing() || ShouldGenerateMovprfx(dst, src); + } +}; + + enum BranchType { // Copies of architectural conditions. // The associated conditions can be used in place of those, the code will @@ -566,7 +616,19 @@ enum BranchType { kBranchTypeFirstCondition = eq, kBranchTypeLastCondition = nv, kBranchTypeFirstUsingReg = reg_zero, - kBranchTypeFirstUsingBit = reg_bit_clear + kBranchTypeFirstUsingBit = reg_bit_clear, + + // SVE branch conditions. + integer_none = eq, + integer_any = ne, + integer_nlast = cs, + integer_last = cc, + integer_first = mi, + integer_nfrst = pl, + integer_pmore = hi, + integer_plast = ls, + integer_tcont = ge, + integer_tstop = lt }; @@ -587,6 +649,18 @@ enum PreShiftImmMode { kAnyShift // Allow any pre-shift. }; +enum FPMacroNaNPropagationOption { + // The default option. This generates a run-time error in macros that respect + // this option. + NoFPMacroNaNPropagationSelected, + // For example, Fmin(result, NaN(a), NaN(b)) always selects NaN(a) if both + // NaN(a) and NaN(b) are both quiet, or both are signalling, at the + // cost of extra code generation in some cases. + StrictNaNPropagation, + // For example, Fmin(result, NaN(a), NaN(b)) selects either NaN, but using the + // fewest instructions. + FastNaNPropagation +}; class MacroAssembler : public Assembler, public MacroAssemblerInterface { public: @@ -946,6 +1020,20 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Claim(const Operand& size); void Drop(const Operand& size); + // As above, but for multiples of the SVE vector length. + void ClaimVL(int64_t multiplier) { + // We never need to worry about sp alignment because the VL is always a + // multiple of 16. + VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0); + VIXL_ASSERT(multiplier >= 0); + Addvl(sp, sp, -multiplier); + } + void DropVL(int64_t multiplier) { + VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0); + VIXL_ASSERT(multiplier >= 0); + Addvl(sp, sp, multiplier); + } + // Preserve the callee-saved registers (as defined by AAPCS64). // // Higher-numbered registers are pushed before lower-numbered registers, and @@ -1489,13 +1577,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Fmov(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - // Only emit an instruction if vd and vn are different, and they are both D - // registers. fmov(s0, s0) is not a no-op because it clears the top word of - // d0. Technically, fmov(d0, d0) is not a no-op either because it clears - // the top of q0, but VRegister does not currently support Q registers. - if (!vd.Is(vn) || !vd.Is64Bits()) { - fmov(vd, vn); - } + // TODO: Use DiscardMoveMode to allow this move to be elided if vd.Is(vn). + fmov(vd, vn); } void Fmov(const VRegister& vd, const Register& rn) { VIXL_ASSERT(allow_macro_instructions_); @@ -1503,12 +1586,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { SingleEmissionCheckScope guard(this); fmov(vd, rn); } - void Fmov(const VRegister& vd, const XRegister& xn) { - Fmov(vd, Register(xn)); - } - void Fmov(const VRegister& vd, const WRegister& wn) { - Fmov(vd, Register(wn)); - } void Fmov(const VRegister& vd, int index, const Register& rn) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); @@ -2970,6 +3047,43 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { NEON_2VREG_SHIFT_LONG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) #undef DEFINE_MACRO_ASM_FUNC +// SVE 3 vector register instructions. +#define SVE_3VREG_COMMUTATIVE_MACRO_LIST(V) \ + V(add, Add) \ + V(and_, And) \ + V(bic, Bic) \ + V(eor, Eor) \ + V(mul, Mul) \ + V(orr, Orr) \ + V(sabd, Sabd) \ + V(smax, Smax) \ + V(smulh, Smulh) \ + V(smin, Smin) \ + V(uabd, Uabd) \ + V(umax, Umax) \ + V(umin, Umin) \ + V(umulh, Umulh) + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const ZRegister& zd, \ + const PRegisterM& pg, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + if (zd.Aliases(zn)) { \ + SingleEmissionCheckScope guard(this); \ + ASM(zd, pg, zd, zm); \ + } else if (zd.Aliases(zm)) { \ + SingleEmissionCheckScope guard(this); \ + ASM(zd, pg, zd, zn); \ + } else { \ + MovprfxHelperScope guard(this, zd, pg, zn); \ + ASM(zd, pg, zd, zm); \ + } \ + } + SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); @@ -3357,6 +3471,2901 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { crc32cx(rd, rn, rm); } + // Scalable Vector Extensions. + void Abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + abs(zd, pg, zn); + } + void Add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + add(zd, zn, zm); + } + void Add(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubHelper(kAddImmediate, zd, zn, imm); + } + void Addpl(const Register& xd, const Register& xn, int64_t multiplier); + void Addvl(const Register& xd, const Register& xn, int64_t multiplier); + // Note that unlike the core ISA, SVE's `adr` is not PC-relative. + void Adr(const ZRegister& zd, const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + adr(zd, addr); + } + void And(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + and_(pd, pg, pn, pm); + } + void And(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + and_(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void And(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + and_(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ands(pd, pg, pn, pm); + } + void Andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + andv(vd, pg, zn); + } + void Asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + asr(zd, pg, zd, shift); + } + void Asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Asr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + asr(zd, zn, shift); + } + void Asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + asr(zd, zn, zm); + } + void Asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + asrd(zd, pg, zd, shift); + } + void Bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bic(pd, pg, pn, pm); + } + void Bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + bic(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + bic(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bics(pd, pg, pn, pm); + } + void Brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brka(pd, pg, pn); + } + void Brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkas(pd, pg, pn); + } + void Brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkb(pd, pg, pn); + } + void Brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkbs(pd, pg, pn); + } + void Brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + if (!pd.Aliases(pm)) { + Mov(pd, pm); + } + SingleEmissionCheckScope guard(this); + brkn(pd, pg, pn, pd); + } + void Brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + if (!pd.Aliases(pm)) { + Mov(pd, pm); + } + SingleEmissionCheckScope guard(this); + brkns(pd, pg, pn, pd); + } + void Brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpa(pd, pg, pn, pm); + } + void Brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpas(pd, pg, pn, pm); + } + void Brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpb(pd, pg, pn, pm); + } + void Brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpbs(pd, pg, pn, pm); + } + void Clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clasta(rd, pg, rn, zm); + } + void Clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clasta(vd, pg, vn, zm); + } + void Clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + void Clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clastb(rd, pg, rn, zm); + } + void Clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clastb(vd, pg, vn, zm); + } + void Clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + void Cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cls(zd, pg, zn); + } + void Clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clz(zd, pg, zn); + } + void Cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpeq(pd, pg, zn, zm); + } + void Cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpeq(pd, pg, zn, imm5); + } else { + CompareHelper(eq, pd, pg, zn, imm); + } + } + void Cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpge(pd, pg, zn, zm); + } + void Cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpge(pd, pg, zn, imm5); + } else { + CompareHelper(ge, pd, pg, zn, imm); + } + } + void Cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpgt(pd, pg, zn, zm); + } + void Cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpgt(pd, pg, zn, imm5); + } else { + CompareHelper(gt, pd, pg, zn, imm); + } + } + void Cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmphi(pd, pg, zn, zm); + } + void Cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmphi(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); + } else { + CompareHelper(hi, pd, pg, zn, imm); + } + } + void Cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmphs(pd, pg, zn, zm); + } + void Cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmphs(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); + } else { + CompareHelper(hs, pd, pg, zn, imm); + } + } + void Cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmple(pd, pg, zn, zm); + } + void Cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmple(pd, pg, zn, imm5); + } else { + CompareHelper(le, pd, pg, zn, imm); + } + } + void Cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmplo(pd, pg, zn, zm); + } + void Cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmplo(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); + } else { + CompareHelper(lo, pd, pg, zn, imm); + } + } + void Cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpls(pd, pg, zn, zm); + } + void Cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmpls(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); + } else { + CompareHelper(ls, pd, pg, zn, imm); + } + } + void Cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmplt(pd, pg, zn, zm); + } + void Cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmplt(pd, pg, zn, imm5); + } else { + CompareHelper(lt, pd, pg, zn, imm); + } + } + void Cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpne(pd, pg, zn, zm); + } + void Cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpne(pd, pg, zn, imm5); + } else { + CompareHelper(ne, pd, pg, zn, imm); + } + } + void Cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnot(zd, pg, zn); + } + void Cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnt(zd, pg, zn); + } + void Cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cntb(rd, pattern, multiplier); + } + void Cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cntd(rd, pattern, multiplier); + } + void Cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnth(rd, pattern, multiplier); + } + void Cntp(const Register& rd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + // The `cntp` instruction architecturally takes an X register, but the + // result will always be in the range [0, kPRegMaxSize] (and therefore + // always fits in a W register), so we can accept a W-sized rd here. + cntp(rd.X(), pg, pn); + } + void Cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cntw(rd, pattern, multiplier); + } + void Compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + compact(zd, pg, zn); + } + void Cpy(const ZRegister& zd, const PRegister& pg, IntegerOperand imm); + void Cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpy(zd, pg, rn); + } + void Cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpy(zd, pg, vn); + } + void Ctermeq(const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ctermeq(rn, rm); + } + void Ctermne(const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ctermne(rn, rm); + } + void Decb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decb(rdn, pattern, multiplier); + } + void Decd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decd(rdn, pattern, multiplier); + } + void Decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decd(zdn, pattern, multiplier); + } + void Dech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dech(rdn, pattern, multiplier); + } + void Dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dech(zdn, pattern, multiplier); + } + void Decp(const Register& rdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decp(rdn, pg); + } + void Decp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `decp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + decp(zd, pg); + } + void Decp(const ZRegister& zdn, const PRegister& pg) { Decp(zdn, pg, zdn); } + void Decw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decw(rdn, pattern, multiplier); + } + void Decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decw(zdn, pattern, multiplier); + } + void Dup(const ZRegister& zd, const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dup(zd, xn); + } + void Dup(const ZRegister& zd, const ZRegister& zn, int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dup(zd, zn, index); + } + void Dup(const ZRegister& zd, IntegerOperand imm); + void Eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + eon(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eor(pd, pg, pn, pm); + } + void Eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + eor(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + eor(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eors(pd, pg, pn, pm); + } + void Eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eorv(vd, pg, zn); + } + void Ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset); + void Fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fabs(zd, pg, zn); + } + void Facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facge(pd, pg, zn, zm); + } + void Facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facgt(pd, pg, zn, zm); + } + void Facle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facge(pd, pg, zm, zn); + } + void Faclt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facgt(pd, pg, zm, zn); + } + void Fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fadd(zd, pg, zd, imm); + } + void Fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fadd(zd, zn, zm); + } + void Fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fadda(vd, pg, vn, zm); + } + void Faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + faddv(vd, pg, zn); + } + void Fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmeq(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmeq(pd, pg, zn, zm); + } + void Fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmge(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmge(pd, pg, zn, zm); + } + void Fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmgt(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmgt(pd, pg, zn, zm); + } + void Fcmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zda, pg, zda); + fcmla(zda, pg, zn, zm, rot); + } + void Fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmla(zda, zn, zm, index, rot); + } + void Fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmle(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmge(pd, pg, zm, zn); + } + void Fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmlt(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmgt(pd, pg, zm, zn); + } + void Fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmne(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmne(pd, pg, zn, zm); + } + void Fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmuo(pd, pg, zn, zm); + } + void Fcpy(const ZRegister& zd, const PRegisterM& pg, double imm); + void Fcpy(const ZRegister& zd, const PRegisterM& pg, float imm); + void Fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm); + void Fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvt(zd, pg, zn); + } + void Fcvt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + // The element type in this predicated movprfx is determined by the larger + // type between the source and destination. + int lane_size = std::max(zd.GetLaneSizeInBits(), zn.GetLaneSizeInBits()); + MovprfxHelperScope guard(this, + zd.WithLaneSize(lane_size), + pg, + zn.WithLaneSize(lane_size)); + fcvt(zd, pg.Merging(), zn); + } + void Fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtzs(zd, pg, zn); + } + void Fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtzu(zd, pg, zn); + } + void Fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fdup(const ZRegister& zd, double imm); + void Fdup(const ZRegister& zd, float imm); + void Fdup(const ZRegister& zd, Float16 imm); + void Fexpa(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fexpa(zd, zn); + } + void Fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmad(zdn, pg, zm, za); + } + void Fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmax(zd, pg, zd, imm); + } + void Fmax( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmaxnm(zd, pg, zd, imm); + } + void Fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmaxnmv(vd, pg, zn); + } + void Fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmaxv(vd, pg, zn); + } + void Fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmin(zd, pg, zd, imm); + } + void Fmin( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fminnm(zd, pg, zd, imm); + } + void Fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fminnmv(vd, pg, zn); + } + void Fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fminv(vd, pg, zn); + } + // zd = za + (zn * zm) + void Fmla( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + // zd = za - (zn * zm) + void Fmls( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fmls(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmov(const ZRegister& zd, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fdup(zd, imm); + } + void Fmov(const ZRegister& zd, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fdup(zd, imm); + } + void Fmov(const ZRegister& zd, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fdup(zd, imm); + } + void Fmov(const ZRegister& zd, const PRegisterM& pg, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fcpy(zd, pg, imm); + } + void Fmov(const ZRegister& zd, const PRegisterM& pg, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fcpy(zd, pg, imm); + } + void Fmov(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fcpy(zd, pg, imm); + } + void Fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmsb(zdn, pg, zm, za); + } + void Fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmul(zd, pg, zd, imm); + } + void Fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmul(zd, zn, zm, index); + } + void Fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmul(zd, zn, zm); + } + void Fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fneg(zd, pg, zn); + } + void Fnmla( + const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fnmls( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Frecpe(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frecpe(zd, zn); + } + void Frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frecps(zd, zn, zm); + } + void Frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frecpx(zd, pg, zn); + } + void Frecpx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frecpx(zd, pg.Merging(), zn); + } + void Frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frinta(zd, pg, zn); + } + void Frinta(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frinta(zd, pg.Merging(), zn); + } + void Frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frinti(zd, pg, zn); + } + void Frinti(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frinti(zd, pg.Merging(), zn); + } + void Frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintm(zd, pg, zn); + } + void Frintm(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintm(zd, pg.Merging(), zn); + } + void Frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintn(zd, pg, zn); + } + void Frintn(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintn(zd, pg.Merging(), zn); + } + void Frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintp(zd, pg, zn); + } + void Frintp(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintp(zd, pg.Merging(), zn); + } + void Frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintx(zd, pg, zn); + } + void Frintx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintx(zd, pg.Merging(), zn); + } + void Frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintz(zd, pg, zn); + } + void Frintz(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintz(zd, pg.Merging(), zn); + } + void Frsqrte(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frsqrte(zd, zn); + } + void Frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frsqrts(zd, zn, zm); + } + void Fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fsqrt(zd, pg, zn); + } + void Fsqrt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fsqrt(zd, pg.Merging(), zn); + } + void Fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fsub(zd, pg, zd, imm); + } + void Fsub(const ZRegister& zd, + const PRegisterM& pg, + double imm, + const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fsubr(zd, pg, zd, imm); + } + void Fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fsub(zd, zn, zm); + } + void Ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3); + void Ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ftsmul(zd, zn, zm); + } + void Ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ftssel(zd, zn, zm); + } + void Incb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incb(rdn, pattern, multiplier); + } + void Incd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incd(rdn, pattern, multiplier); + } + void Incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incd(zdn, pattern, multiplier); + } + void Inch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + inch(rdn, pattern, multiplier); + } + void Inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + inch(zdn, pattern, multiplier); + } + void Incp(const Register& rdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incp(rdn, pg); + } + void Incp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `incp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + incp(zd, pg); + } + void Incp(const ZRegister& zdn, const PRegister& pg) { Incp(zdn, pg, zdn); } + void Incw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incw(rdn, pattern, multiplier); + } + void Incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incw(zdn, pattern, multiplier); + } + void Index(const ZRegister& zd, const Operand& start, const Operand& step); + void Insr(const ZRegister& zdn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + insr(zdn, rm); + } + void Insr(const ZRegister& zdn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + insr(zdn, vm); + } + void Insr(const ZRegister& zdn, IntegerOperand imm); + void Lasta(const Register& rd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lasta(rd, pg, zn); + } + void Lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lasta(vd, pg, zn); + } + void Lastb(const Register& rd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lastb(rd, pg, zn); + } + void Lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lastb(vd, pg, zn); + } + void Ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rb, + kBRegSizeInBytes); + } + void Ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rh, + kHRegSizeInBytes); + } + void Ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rw, + kSRegSizeInBytes); + } + void Ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rd, + kDRegSizeInBytes); + } + void Ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rsb, + kBRegSizeInBytes); + } + void Ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rsh, + kHRegSizeInBytes); + } + void Ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rsw, + kSRegSizeInBytes); + } + void Ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2b(zt1, zt2, pg, addr); + } + void Ld2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2h(zt1, zt2, pg, addr); + } + void Ld2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2w(zt1, zt2, pg, addr); + } + void Ld2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2d(zt1, zt2, pg, addr); + } + void Ld3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3b(zt1, zt2, zt3, pg, addr); + } + void Ld3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3h(zt1, zt2, zt3, pg, addr); + } + void Ld3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3w(zt1, zt2, zt3, pg, addr); + } + void Ld3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3d(zt1, zt2, zt3, pg, addr); + } + void Ld4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4b(zt1, zt2, zt3, zt4, pg, addr); + } + void Ld4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4h(zt1, zt2, zt3, zt4, pg, addr); + } + void Ld4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4w(zt1, zt2, zt3, zt4, pg, addr); + } + void Ld4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4d(zt1, zt2, zt3, zt4, pg, addr); + } + void Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1b(zt, pg, xn, zm); + } + void Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1b(zt, pg, zn, imm5); + } + void Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1d(zt, pg, xn, zm); + } + void Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1d(zt, pg, zn, imm5); + } + void Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1h(zt, pg, xn, zm); + } + void Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1h(zt, pg, zn, imm5); + } + void Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sb(zt, pg, xn, zm); + } + void Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sb(zt, pg, zn, imm5); + } + void Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sh(zt, pg, xn, zm); + } + void Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sh(zt, pg, zn, imm5); + } + void Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sw(zt, pg, xn, zm); + } + void Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sw(zt, pg, zn, imm5); + } + void Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1w(zt, pg, xn, zm); + } + void Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1w(zt, pg, zn, imm5); + } + void Ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1b(zt, pg, addr); + } + void Ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1d(zt, pg, addr); + } + void Ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1h(zt, pg, addr); + } + void Ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1sb(zt, pg, addr); + } + void Ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1sh(zt, pg, addr); + } + void Ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1sw(zt, pg, addr); + } + void Ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1w(zt, pg, addr); + } + void Ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldr(const CPURegister& rt, const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::ldr); + } + void Lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + lsl(zd, pg, zd, shift); + } + void Lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Lsl(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsl(zd, zn, shift); + } + void Lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsl(zd, zn, zm); + } + void Lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + lsr(zd, pg, zd, shift); + } + void Lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Lsr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsr(zd, zn, shift); + } + void Lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsr(zd, zn, zm); + } + void Mov(const PRegister& pd, const PRegister& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(pd.VnB(), pn.VnB()); + } + void Mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(pd, pg, pn); + } + void Mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(pd, pg, pn); + } + void Mov(const ZRegister& zd, const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, xn); + } + + void Mov(const ZRegister& zd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, vn); + } + + void Mov(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, zn); + } + void Mov(const ZRegister& zd, const ZRegister& zn, unsigned index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, zn, index); + } + void Mov(const ZRegister& zd, const PRegister& pg, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + Cpy(zd, pg, imm); + } + // TODO: support zeroing predicated moves using movprfx. + void Mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, pg, rn); + } + void Mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, pg, vn); + } + void Mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, pg, zn); + } + void Mov(const ZRegister& zd, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + Dup(zd, imm); + } + void Movs(const PRegister& pd, const PRegister& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + movs(pd, pn); + } + void Movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + movs(pd, pg, pn); + } + // zd = za + (zn * zm) + void Mla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + // zd = za - (zn * zm) + void Mls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nand(pd, pg, pn, pm); + } + void Nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nands(pd, pg, pn, pm); + } + // There is no instruction with this form, but we can implement it using + // `subr`. + void Neg(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, zn); + subr(zd, zd, 0); + } + void Neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + neg(zd, pg, zn); + } + void Nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nor(pd, pg, pn, pm); + } + void Nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nors(pd, pg, pn, pm); + } + void Not(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + not_(pd, pg, pn); + } + void Not(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + not_(zd, pg, zn); + } + void Nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nots(pd, pg, pn); + } + void Orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orn(pd, pg, pn, pm); + } + void Orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + orn(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orns(pd, pg, pn, pm); + } + void Orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orr(pd, pg, pn, pm); + } + void Orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + orr(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + orr(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orrs(pd, pg, pn, pm); + } + void Orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orv(vd, pg, zn); + } + void Pfalse(const PRegister& pd) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pd.IsUnqualified()); + SingleEmissionCheckScope guard(this); + // No matter what the lane size is, overall this operation just writes zeros + // throughout the register. + pfalse(pd.VnB()); + } + void Pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + void Pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + void Prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfb(prfop, pg, addr); + } + void Prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfh(prfop, pg, addr); + } + void Prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfw(prfop, pg, addr); + } + void Prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfd(prfop, pg, addr); + } + void Ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ptest(pg, pn); + } + void Ptrue(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern, + FlagsUpdate s); + void Ptrue(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern = SVE_ALL) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ptrue(pd, pattern); + } + void Ptrues(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern = SVE_ALL) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ptrues(pd, pattern); + } + void Punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + punpkhi(pd, pn); + } + void Punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + punpklo(pd, pn); + } + void Rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rbit(zd, pg, zn); + } + void Rdffr(const PRegister& pd) { + VIXL_ASSERT(allow_macro_instructions_); + // Although this is essentially just a move, it writes every bit and so can + // only support b-sized lane because other lane sizes would simplicity clear + // bits in `pd`. + VIXL_ASSERT(!pd.HasLaneSize() || pd.IsLaneSizeB()); + VIXL_ASSERT(pd.IsUnqualified()); + SingleEmissionCheckScope guard(this); + rdffr(pd.VnB()); + } + void Rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rdffr(pd, pg); + } + void Rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rdffrs(pd, pg); + } + // Note that there is no `rdpl` instruction, but this macro emulates it (for + // symmetry with `Rdvl`). + void Rdpl(const Register& xd, int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + Addpl(xd, xzr, multiplier); + } + void Rdvl(const Register& xd, int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + Addvl(xd, xzr, multiplier); + } + void Rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rev(pd, pn); + } + void Rev(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rev(zd, zn); + } + void Revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + revb(zd, pg, zn); + } + void Revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + revh(zd, pg, zn); + } + void Revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + revw(zd, pg, zn); + } + void Saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddv(dd, pg, zn); + } + void Scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + scvtf(zd, pg, zn); + } + void Sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sel(pd, pg, pn, pm); + } + void Sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sel(zd, pg, zn, zm); + } + void Setffr() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setffr(); + } + void Smax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smaxv(vd, pg, zn); + } + void Smin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sminv(vd, pg, zn); + } + void Splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqadd(zd, zn, zm); + } + void Sqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + sqadd(zd, zd, imm.AsUint16()); + } + void Sqdecb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecb(xd, wn, pattern, multiplier); + } + void Sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecb(rdn, pattern, multiplier); + } + void Sqdecd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecd(xd, wn, pattern, multiplier); + } + void Sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecd(rdn, pattern, multiplier); + } + void Sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecd(zdn, pattern, multiplier); + } + void Sqdech(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdech(xd, wn, pattern, multiplier); + } + void Sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdech(rdn, pattern, multiplier); + } + void Sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdech(zdn, pattern, multiplier); + } + void Sqdecp(const Register& xdn, + const PRegisterWithLaneSize& pg, + const Register& wdn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecp(xdn, pg, wdn); + } + void Sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecp(xdn, pg); + } + void Sqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqdecp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + sqdecp(zd, pg); + } + void Sqdecp(const ZRegister& zdn, const PRegister& pg) { + Sqdecp(zdn, pg, zdn); + } + void Sqdecw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecw(xd, wn, pattern, multiplier); + } + void Sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecw(rdn, pattern, multiplier); + } + void Sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecw(zdn, pattern, multiplier); + } + void Sqincb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincb(xd, wn, pattern, multiplier); + } + void Sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincb(rdn, pattern, multiplier); + } + void Sqincd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincd(xd, wn, pattern, multiplier); + } + void Sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincd(rdn, pattern, multiplier); + } + void Sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincd(zdn, pattern, multiplier); + } + void Sqinch(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqinch(xd, wn, pattern, multiplier); + } + void Sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqinch(rdn, pattern, multiplier); + } + void Sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqinch(zdn, pattern, multiplier); + } + void Sqincp(const Register& xdn, + const PRegisterWithLaneSize& pg, + const Register& wdn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincp(xdn, pg, wdn); + } + void Sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincp(xdn, pg); + } + void Sqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqincp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + sqincp(zd, pg); + } + void Sqincp(const ZRegister& zdn, const PRegister& pg) { + Sqincp(zdn, pg, zdn); + } + void Sqincw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincw(xd, wn, pattern, multiplier); + } + void Sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincw(rdn, pattern, multiplier); + } + void Sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincw(zdn, pattern, multiplier); + } + void Sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqsub(zd, zn, zm); + } + void Sqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + sqsub(zd, zd, imm.AsUint16()); + } + void St1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2b(zt1, zt2, pg, addr); + } + void St2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2h(zt1, zt2, pg, addr); + } + void St2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2w(zt1, zt2, pg, addr); + } + void St2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2d(zt1, zt2, pg, addr); + } + void St3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3b(zt1, zt2, zt3, pg, addr); + } + void St3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3h(zt1, zt2, zt3, pg, addr); + } + void St3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3w(zt1, zt2, zt3, pg, addr); + } + void St3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3d(zt1, zt2, zt3, pg, addr); + } + void St4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4b(zt1, zt2, zt3, zt4, pg, addr); + } + void St4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4h(zt1, zt2, zt3, zt4, pg, addr); + } + void St4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4w(zt1, zt2, zt3, zt4, pg, addr); + } + void St4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4d(zt1, zt2, zt3, zt4, pg, addr); + } + void Stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Str(const CPURegister& rt, const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::str); + } + void Sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sub(zd, zn, zm); + } + void Sub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubHelper(kSubImmediate, zd, zn, imm); + } + void Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm); + void Sunpkhi(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sunpkhi(zd, zn); + } + void Sunpklo(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sunpklo(zd, zn); + } + void Sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sxtb(zd, pg, zn); + } + void Sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sxth(zd, pg, zn); + } + void Sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sxtw(zd, pg, zn); + } + void Tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(zd, zn, zm); + } + void Trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn1(pd, pn, pm); + } + void Trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn1(zd, zn, zm); + } + void Trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn2(pd, pn, pm); + } + void Trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn2(zd, zn, zm); + } + void Uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddv(dd, pg, zn); + } + void Ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ucvtf(zd, pg, zn); + } + void Udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umaxv(vd, pg, zn); + } + void Umin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uminv(vd, pg, zn); + } + void Uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqadd(zd, zn, zm); + } + void Uqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + uqadd(zd, zd, imm.AsUint16()); + } + void Uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecb(rdn, pattern, multiplier); + } + void Uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecd(rdn, pattern, multiplier); + } + void Uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecd(zdn, pattern, multiplier); + } + void Uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdech(rdn, pattern, multiplier); + } + void Uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdech(zdn, pattern, multiplier); + } + // The saturation is based on the size of `rn`. The result is zero-extended + // into `rd`, which must be at least as big. + void Uqdecp(const Register& rd, + const PRegisterWithLaneSize& pg, + const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(rd.Aliases(rn)); + VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes()); + SingleEmissionCheckScope guard(this); + if (rn.Is64Bits()) { + uqdecp(rd, pg); + } else { + // Convert <Xd> into <Wd>, to make this more consistent with Sqdecp. + uqdecp(rd.W(), pg); + } + } + void Uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) { + Uqdecp(rdn, pg, rdn); + } + void Uqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqdecp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + uqdecp(zd, pg); + } + void Uqdecp(const ZRegister& zdn, const PRegister& pg) { + Uqdecp(zdn, pg, zdn); + } + void Uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecw(rdn, pattern, multiplier); + } + void Uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecw(zdn, pattern, multiplier); + } + void Uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincb(rdn, pattern, multiplier); + } + void Uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincd(rdn, pattern, multiplier); + } + void Uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincd(zdn, pattern, multiplier); + } + void Uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqinch(rdn, pattern, multiplier); + } + void Uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqinch(zdn, pattern, multiplier); + } + // The saturation is based on the size of `rn`. The result is zero-extended + // into `rd`, which must be at least as big. + void Uqincp(const Register& rd, + const PRegisterWithLaneSize& pg, + const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(rd.Aliases(rn)); + VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes()); + SingleEmissionCheckScope guard(this); + if (rn.Is64Bits()) { + uqincp(rd, pg); + } else { + // Convert <Xd> into <Wd>, to make this more consistent with Sqincp. + uqincp(rd.W(), pg); + } + } + void Uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) { + Uqincp(rdn, pg, rdn); + } + void Uqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqincp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + uqincp(zd, pg); + } + void Uqincp(const ZRegister& zdn, const PRegister& pg) { + Uqincp(zdn, pg, zdn); + } + void Uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincw(rdn, pattern, multiplier); + } + void Uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincw(zdn, pattern, multiplier); + } + void Uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqsub(zd, zn, zm); + } + void Uqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + uqsub(zd, zd, imm.AsUint16()); + } + void Uunpkhi(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uunpkhi(zd, zn); + } + void Uunpklo(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uunpklo(zd, zn); + } + void Uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uxtb(zd, pg, zn); + } + void Uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uxth(zd, pg, zn); + } + void Uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uxtw(zd, pg, zn); + } + void Uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp1(pd, pn, pm); + } + void Uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp1(zd, zn, zm); + } + void Uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp2(pd, pn, pm); + } + void Uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp2(zd, zn, zm); + } + void Whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilele(pd, rn, rm); + } + void Whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilelo(pd, rn, rm); + } + void Whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilels(pd, rn, rm); + } + void Whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilelt(pd, rn, rm); + } + void Wrffr(const PRegister& pn) { + VIXL_ASSERT(allow_macro_instructions_); + // Although this is essentially just a move, it writes every bit and so can + // only support b-sized lane because other lane sizes would implicitly clear + // bits in `ffr`. + VIXL_ASSERT(!pn.HasLaneSize() || pn.IsLaneSizeB()); + VIXL_ASSERT(pn.IsUnqualified()); + SingleEmissionCheckScope guard(this); + wrffr(pn.VnB()); + } + void Zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip1(pd, pn, pm); + } + void Zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip1(zd, zn, zm); + } + void Zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip2(pd, pn, pm); + } + void Zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip2(zd, zn, zm); + } + template <typename T> Literal<T>* CreateLiteralDestroyedWithPool(T value) { return new Literal<T>(value, @@ -3480,11 +6489,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { return GetScratchRegisterList(); } - CPURegList* GetScratchVRegisterList() { return &fptmp_list_; } + CPURegList* GetScratchVRegisterList() { return &v_tmp_list_; } VIXL_DEPRECATED("GetScratchVRegisterList", CPURegList* FPTmpList()) { return GetScratchVRegisterList(); } + CPURegList* GetScratchPRegisterList() { return &p_tmp_list_; } + // Get or set the current (most-deeply-nested) UseScratchRegisterScope. void SetCurrentScratchRegisterScope(UseScratchRegisterScope* scope) { current_scratch_scope_ = scope; @@ -3548,16 +6559,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { // Will output the flags. void Log(TraceParameters parameters); - // Enable or disable instrumentation when an Instrument visitor is attached to - // the simulator. - void EnableInstrumentation(); - void DisableInstrumentation(); - - // Add a marker to the instrumentation data produced by an Instrument visitor. - // The name is a two character string that will be attached to the marker in - // the output data. - void AnnotateInstrumentation(const char* marker_name); - // Enable or disable CPU features dynamically. This mechanism allows users to // strictly check the use of CPU features in different regions of code. void SetSimulatorCPUFeatures(const CPUFeatures& features); @@ -3661,6 +6662,36 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { Condition cond, bool* should_synthesise_left); + // Generate code to calculate the address represented by `addr` and write it + // into `xd`. This is used as a common fall-back for out-of-range load and + // store operands. + // + // The vl_divisor_log2 argument is used to scale the VL, for use with + // SVE_MUL_VL. + void CalculateSVEAddress(const Register& xd, + const SVEMemOperand& addr, + int vl_divisor_log2 = 0); + + void CalculateSVEAddress(const Register& xd, + const SVEMemOperand& addr, + const CPURegister& rt) { + VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); + int vl_divisor_log2 = rt.IsPRegister() ? kZRegBitsPerPRegBitLog2 : 0; + CalculateSVEAddress(xd, addr, vl_divisor_log2); + } + + void SetFPNaNPropagationOption(FPMacroNaNPropagationOption nan_option) { + fp_nan_propagation_ = nan_option; + } + + void ResolveFPNaNPropagationOption(FPMacroNaNPropagationOption* nan_option) { + // The input option has priority over the option that has set. + if (*nan_option == NoFPMacroNaNPropagationSelected) { + *nan_option = fp_nan_propagation_; + } + VIXL_ASSERT(*nan_option != NoFPMacroNaNPropagationSelected); + } + private: // The actual Push and Pop implementations. These don't generate any code // other than that required for the push or pop. This allows @@ -3714,6 +6745,183 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features, DebugHltOpcode action); + void CompareHelper(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm); + + // E.g. Ld1rb. + typedef void (Assembler::*SVELoadBroadcastFn)(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + void SVELoadBroadcastImmHelper(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + SVELoadBroadcastFn fn, + int divisor); + + // E.g. ldr/str + typedef void (Assembler::*SVELoadStoreFn)(const CPURegister& rt, + const SVEMemOperand& addr); + + void SVELoadStoreScalarImmHelper(const CPURegister& rt, + const SVEMemOperand& addr, + SVELoadStoreFn fn); + + typedef void (Assembler::*SVELoad1Fn)(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + typedef void (Assembler::*SVEStore1Fn)(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Helper for predicated Z register loads with addressing modes not directly + // encodable in the instruction. The supported_modifier parameter indicates + // which offset modifier the calling instruction encoder supports (eg. + // SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as + // vl_divisor_log2; pass -1 to indicate no dependency. + template <typename Tg, typename Tf> + void SVELoadStoreScalarImmHelper( + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn, + int imm_bits, + int shift_amount, + SVEOffsetModifier supported_modifier = NO_SVE_OFFSET_MODIFIER, + int vl_divisor_log2 = 0); + + template <typename Tg, typename Tf> + void SVELoadStore1Helper(int msize_in_bytes_log2, + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn); + + template <typename Tf> + void SVELoadFFHelper(int msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + Tf fn); + + typedef void (MacroAssembler::*IntWideImmMacroFn)(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm); + + typedef void (Assembler::*IntWideImmShiftFn)(const ZRegister& zd, + const ZRegister& zn, + int imm, + int shift); + + typedef void (Assembler::*IntArithFn)(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm); + + typedef void (Assembler::*IntWideImmFn)(const ZRegister& zd, + const ZRegister& zn, + int imm); + + typedef void (Assembler::*IntArithIndexFn)(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + void IntWideImmHelper(IntWideImmFn imm_fn, + SVEArithPredicatedFn reg_fn, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm, + bool is_signed_imm); + + enum AddSubHelperOption { kAddImmediate, kSubImmediate }; + + void AddSubHelper(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm); + + // Try to emit an add- or sub-like instruction (imm_fn) with `imm`, or the + // corresponding sub- or add-like instruction (n_imm_fn) with a negated `imm`. + // A `movprfx` is automatically generated if one is required. If successful, + // return true. Otherwise, return false. + // + // This helper uses two's complement equivalences, for example treating 0xffff + // as -1 for H-sized lanes. + bool TrySingleAddSub(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm); + + void SVESdotUdotHelper(IntArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void SVESdotUdotIndexHelper(IntArithIndexFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // For noncommutative arithmetic operations. + void NoncommutativeArithmeticHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + SVEArithPredicatedFn rev_fn); + + void FPCommutativeArithmeticHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + FPMacroNaNPropagationOption nan_option); + + // Floating-point fused multiply-add vectors (predicated), writing addend. + typedef void (Assembler::*SVEMulAddPredicatedZdaFn)(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point fused multiply-add vectors (predicated), writing + // multiplicand. + typedef void (Assembler::*SVEMulAddPredicatedZdnFn)(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + void FPMulAddHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + SVEMulAddPredicatedZdaFn fn_zda, + SVEMulAddPredicatedZdnFn fn_zdn, + FPMacroNaNPropagationOption nan_option); + + typedef void (Assembler::*SVEMulAddIndexFn)(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + void FPMulAddIndexHelper(SVEMulAddIndexFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + // Tell whether any of the macro instruction can be used. When false the // MacroAssembler will assert if a method which can emit a variable number // of instructions is called. @@ -3727,7 +6935,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { // Scratch registers available for use by the MacroAssembler. CPURegList tmp_list_; - CPURegList fptmp_list_; + CPURegList v_tmp_list_; + CPURegList p_tmp_list_; UseScratchRegisterScope* current_scratch_scope_; @@ -3737,6 +6946,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { ptrdiff_t checkpoint_; ptrdiff_t recommended_checkpoint_; + FPMacroNaNPropagationOption fp_nan_propagation_; + friend class Pool; friend class LiteralPool; }; @@ -3805,11 +7016,35 @@ class BlockPoolsScope { MacroAssembler* masm_; }; +MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const ZRegister& src) + : ExactAssemblyScope(masm, + ShouldGenerateMovprfx(dst, src) + ? (2 * kInstructionSize) + : kInstructionSize) { + if (ShouldGenerateMovprfx(dst, src)) { + masm->movprfx(dst, src); + } +} + +MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const PRegister& pg, + const ZRegister& src) + : ExactAssemblyScope(masm, + ShouldGenerateMovprfx(dst, pg, src) + ? (2 * kInstructionSize) + : kInstructionSize) { + if (ShouldGenerateMovprfx(dst, pg, src)) { + masm->movprfx(dst, pg, src); + } +} // This scope utility allows scratch registers to be managed safely. The -// MacroAssembler's GetScratchRegisterList() (and GetScratchVRegisterList()) is -// used as a pool of scratch registers. These registers can be allocated on -// demand, and will be returned at the end of the scope. +// MacroAssembler's GetScratch*RegisterList() are used as a pool of scratch +// registers. These registers can be allocated on demand, and will be returned +// at the end of the scope. // // When the scope ends, the MacroAssembler's lists will be restored to their // original state, even if the lists were modified by some other means. @@ -3819,14 +7054,22 @@ class UseScratchRegisterScope { // must not be `NULL`), so it is ready to use immediately after it has been // constructed. explicit UseScratchRegisterScope(MacroAssembler* masm) - : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) { + : masm_(NULL), + parent_(NULL), + old_available_(0), + old_available_v_(0), + old_available_p_(0) { Open(masm); } // This constructor does not implicitly initialise the scope. Instead, the // user is required to explicitly call the `Open` function before using the // scope. UseScratchRegisterScope() - : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) {} + : masm_(NULL), + parent_(NULL), + old_available_(0), + old_available_v_(0), + old_available_p_(0) {} // This function performs the actual initialisation work. void Open(MacroAssembler* masm); @@ -3841,25 +7084,42 @@ class UseScratchRegisterScope { bool IsAvailable(const CPURegister& reg) const; - // Take a register from the appropriate temps list. It will be returned // automatically when the scope ends. Register AcquireW() { - return AcquireNextAvailable(masm_->GetScratchRegisterList()).W(); + return AcquireFrom(masm_->GetScratchRegisterList()).W(); } Register AcquireX() { - return AcquireNextAvailable(masm_->GetScratchRegisterList()).X(); + return AcquireFrom(masm_->GetScratchRegisterList()).X(); } VRegister AcquireH() { - return AcquireNextAvailable(masm_->GetScratchVRegisterList()).H(); + return AcquireFrom(masm_->GetScratchVRegisterList()).H(); } VRegister AcquireS() { - return AcquireNextAvailable(masm_->GetScratchVRegisterList()).S(); + return AcquireFrom(masm_->GetScratchVRegisterList()).S(); } VRegister AcquireD() { - return AcquireNextAvailable(masm_->GetScratchVRegisterList()).D(); + return AcquireFrom(masm_->GetScratchVRegisterList()).D(); + } + ZRegister AcquireZ() { + return AcquireFrom(masm_->GetScratchVRegisterList()).Z(); + } + PRegister AcquireP() { + // Prefer to allocate p8-p15 if we can, to leave p0-p7 available for use as + // governing predicates. + CPURegList* available = masm_->GetScratchPRegisterList(); + RegList preferred = ~kGoverningPRegisterMask; + if ((available->GetList() & preferred) != 0) { + return AcquireFrom(available, preferred).P(); + } + return AcquireFrom(available).P(); + } + // Acquire a P register suitable for use as a governing predicate in + // instructions which only accept p0-p7 for that purpose. + PRegister AcquireGoverningP() { + CPURegList* available = masm_->GetScratchPRegisterList(); + return AcquireFrom(available, kGoverningPRegisterMask).P(); } - Register AcquireRegisterOfSize(int size_in_bits); Register AcquireSameSizeAs(const Register& reg) { @@ -3875,6 +7135,12 @@ class UseScratchRegisterScope { : CPURegister(AcquireRegisterOfSize(size_in_bits)); } + // Acquire a register big enough to represent one lane of `vector`. + Register AcquireRegisterToHoldLane(const CPURegister& vector) { + VIXL_ASSERT(vector.GetLaneSizeInBits() <= kXRegSize); + return (vector.GetLaneSizeInBits() > kWRegSize) ? AcquireX() : AcquireW(); + } + // Explicitly release an acquired (or excluded) register, putting it back in // the appropriate temps list. @@ -3892,6 +7158,10 @@ class UseScratchRegisterScope { const VRegister& reg2 = NoVReg, const VRegister& reg3 = NoVReg, const VRegister& reg4 = NoVReg); + void Include(const CPURegister& reg1, + const CPURegister& reg2 = NoCPUReg, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); // Make sure that the specified registers are not available in this scope. @@ -3911,21 +7181,40 @@ class UseScratchRegisterScope { const CPURegister& reg3 = NoCPUReg, const CPURegister& reg4 = NoCPUReg); + // Convenience for excluding registers that are part of Operands. This is + // useful for sequences like this: + // + // // Use 'rd' as a scratch, but only if it's not aliased by an input. + // temps.Include(rd); + // temps.Exclude(rn); + // temps.Exclude(operand); + // + // Otherwise, a conditional check is needed on the last 'Exclude'. + void Exclude(const Operand& operand) { + if (operand.IsShiftedRegister() || operand.IsExtendedRegister()) { + Exclude(operand.GetRegister()); + } else { + VIXL_ASSERT(operand.IsImmediate()); + } + } // Prevent any scratch registers from being used in this scope. void ExcludeAll(); private: - static CPURegister AcquireNextAvailable(CPURegList* available); + static CPURegister AcquireFrom(CPURegList* available, + RegList mask = ~static_cast<RegList>(0)); static void ReleaseByCode(CPURegList* available, int code); - static void ReleaseByRegList(CPURegList* available, RegList regs); - static void IncludeByRegList(CPURegList* available, RegList exclude); - static void ExcludeByRegList(CPURegList* available, RegList exclude); + CPURegList* GetAvailableListFor(CPURegister::RegisterBank bank); + + static const RegList kGoverningPRegisterMask = + (static_cast<RegList>(1) << kNumberOfGoverningPRegisters) - 1; + // The MacroAssembler maintains a list of available scratch registers, and // also keeps track of the most recently-opened scope so that on destruction // we can check that scopes do not outlive their parents. @@ -3934,7 +7223,8 @@ class UseScratchRegisterScope { // The state of the available lists at the start of this scope. RegList old_available_; // kRegister - RegList old_available_v_; // kVRegister + RegList old_available_v_; // kVRegister / kZRegister + RegList old_available_p_; // kPRegister // Disallow copy constructor and operator=. VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope( @@ -3955,23 +7245,11 @@ class UseScratchRegisterScope { // features needs a corresponding macro instruction. class SimulationCPUFeaturesScope { public: - explicit SimulationCPUFeaturesScope( - MacroAssembler* masm, - CPUFeatures::Feature feature0 = CPUFeatures::kNone, - CPUFeatures::Feature feature1 = CPUFeatures::kNone, - CPUFeatures::Feature feature2 = CPUFeatures::kNone, - CPUFeatures::Feature feature3 = CPUFeatures::kNone) - : masm_(masm), - cpu_features_scope_(masm, feature0, feature1, feature2, feature3) { - masm_->SaveSimulatorCPUFeatures(); - masm_->EnableSimulatorCPUFeatures( - CPUFeatures(feature0, feature1, feature2, feature3)); - } - - SimulationCPUFeaturesScope(MacroAssembler* masm, const CPUFeatures& other) - : masm_(masm), cpu_features_scope_(masm, other) { + template <typename... T> + explicit SimulationCPUFeaturesScope(MacroAssembler* masm, T... features) + : masm_(masm), cpu_features_scope_(masm, features...) { masm_->SaveSimulatorCPUFeatures(); - masm_->EnableSimulatorCPUFeatures(other); + masm_->EnableSimulatorCPUFeatures(CPUFeatures(features...)); } ~SimulationCPUFeaturesScope() { masm_->RestoreSimulatorCPUFeatures(); } diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc new file mode 100644 index 00000000..b107f132 --- /dev/null +++ b/src/aarch64/macro-assembler-sve-aarch64.cc @@ -0,0 +1,2027 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "macro-assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + +void MacroAssembler::AddSubHelper(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(imm.FitsInLane(zd)); + + // Simple, encodable cases. + if (TrySingleAddSub(option, zd, zn, imm)) return; + + VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate)); + bool add_imm = (option == kAddImmediate); + + // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one + // instruction. Also interpret the immediate as signed, so we can convert + // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc. + IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits())); + if (signed_imm.IsNegative()) { + AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate; + IntegerOperand n_imm(signed_imm.GetMagnitude()); + // IntegerOperand can represent -INT_MIN, so this is always safe. + VIXL_ASSERT(n_imm.IsPositiveOrZero()); + if (TrySingleAddSub(n_option, zd, zn, n_imm)) return; + } + + // Otherwise, fall back to dup + ADD_z_z/SUB_z_z. + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Dup(scratch, imm); + + SingleEmissionCheckScope guard(this); + if (add_imm) { + add(zd, zn, scratch); + } else { + sub(zd, zn, scratch); + } +} + +bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(imm.FitsInLane(zd)); + + int imm8; + int shift = -1; + if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { + MovprfxHelperScope guard(this, zd, zn); + switch (option) { + case kAddImmediate: + add(zd, zd, imm8, shift); + return true; + case kSubImmediate: + sub(zd, zd, imm8, shift); + return true; + } + } + return false; +} + +void MacroAssembler::IntWideImmHelper(IntWideImmFn imm_fn, + SVEArithPredicatedFn reg_macro, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm, + bool is_signed) { + if (is_signed) { + // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi + if (imm.IsInt8()) { + MovprfxHelperScope guard(this, zd, zn); + (this->*imm_fn)(zd, zd, imm.AsInt8()); + return; + } + } else { + // E.g. UMIN_z_zi, UMAX_z_zi + if (imm.IsUint8()) { + MovprfxHelperScope guard(this, zd, zn); + (this->*imm_fn)(zd, zd, imm.AsUint8()); + return; + } + } + + UseScratchRegisterScope temps(this); + PRegister pg = temps.AcquireGoverningP(); + Ptrue(pg.WithSameLaneSizeAs(zd)); + + // Try to re-use zd if we can, so we can avoid a movprfx. + ZRegister scratch = + zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()) + : zd; + Dup(scratch, imm); + + // The vector-form macro for commutative operations will swap the arguments to + // avoid movprfx, if necessary. + (this->*reg_macro)(zd, pg.Merging(), zn, scratch); +} + +void MacroAssembler::Mul(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + IntWideImmFn imm_fn = &Assembler::mul; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); +} + +void MacroAssembler::Smin(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInSignedLane(zd)); + IntWideImmFn imm_fn = &Assembler::smin; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); +} + +void MacroAssembler::Smax(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInSignedLane(zd)); + IntWideImmFn imm_fn = &Assembler::smax; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); +} + +void MacroAssembler::Umax(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); + IntWideImmFn imm_fn = &Assembler::umax; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); +} + +void MacroAssembler::Umin(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); + IntWideImmFn imm_fn = &Assembler::umin; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); +} + +void MacroAssembler::Addpl(const Register& xd, + const Register& xn, + int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + + // This macro relies on `Rdvl` to handle some out-of-range cases. Check that + // `VL * multiplier` cannot overflow, for any possible value of VL. + VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); + VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); + + if (xd.IsZero()) return; + if (xn.IsZero() && xd.IsSP()) { + // TODO: This operation doesn't make much sense, but we could support it + // with a scratch register if necessary. + VIXL_UNIMPLEMENTED(); + } + + // Handling xzr requires an extra move, so defer it until later so we can try + // to use `rdvl` instead (via `Addvl`). + if (IsInt6(multiplier) && !xn.IsZero()) { + SingleEmissionCheckScope guard(this); + addpl(xd, xn, static_cast<int>(multiplier)); + return; + } + + // If `multiplier` is a multiple of 8, we can use `Addvl` instead. + if ((multiplier % kZRegBitsPerPRegBit) == 0) { + Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit); + return; + } + + if (IsInt6(multiplier)) { + VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`. + // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so + // materialise a zero. + MacroEmissionCheckScope guard(this); + movz(xd, 0); + addpl(xd, xd, static_cast<int>(multiplier)); + return; + } + + // TODO: Some probable cases result in rather long sequences. For example, + // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just + // outside the encodable range. We should look for ways to cover such cases + // without drastically increasing the complexity of this logic. + + // For other cases, calculate xn + (PL * multiplier) using discrete + // instructions. This requires two scratch registers in the general case, so + // try to re-use the destination as a scratch register. + UseScratchRegisterScope temps(this); + temps.Include(xd); + temps.Exclude(xn); + + Register scratch = temps.AcquireX(); + // Because there is no `rdpl`, so we have to calculate PL from VL. We can't + // scale the multiplier because (we already know) it isn't a multiple of 8. + Rdvl(scratch, multiplier); + + MacroEmissionCheckScope guard(this); + if (xn.IsZero()) { + asr(xd, scratch, kZRegBitsPerPRegBitLog2); + } else if (xd.IsSP() || xn.IsSP()) { + // TODO: MacroAssembler::Add should be able to handle this. + asr(scratch, scratch, kZRegBitsPerPRegBitLog2); + add(xd, xn, scratch); + } else { + add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2)); + } +} + +void MacroAssembler::Addvl(const Register& xd, + const Register& xn, + int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(xn.IsX()); + + // Check that `VL * multiplier` cannot overflow, for any possible value of VL. + VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); + VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); + + if (xd.IsZero()) return; + if (xn.IsZero() && xd.IsSP()) { + // TODO: This operation doesn't make much sense, but we could support it + // with a scratch register if necessary. `rdvl` cannot write into `sp`. + VIXL_UNIMPLEMENTED(); + } + + if (IsInt6(multiplier)) { + SingleEmissionCheckScope guard(this); + if (xn.IsZero()) { + rdvl(xd, static_cast<int>(multiplier)); + } else { + addvl(xd, xn, static_cast<int>(multiplier)); + } + return; + } + + // TODO: Some probable cases result in rather long sequences. For example, + // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just + // outside the encodable range. We should look for ways to cover such cases + // without drastically increasing the complexity of this logic. + + // For other cases, calculate xn + (VL * multiplier) using discrete + // instructions. This requires two scratch registers in the general case, so + // we try to re-use the destination as a scratch register. + UseScratchRegisterScope temps(this); + temps.Include(xd); + temps.Exclude(xn); + + Register a = temps.AcquireX(); + Mov(a, multiplier); + + MacroEmissionCheckScope guard(this); + Register b = temps.AcquireX(); + rdvl(b, 1); + if (xn.IsZero()) { + mul(xd, a, b); + } else if (xd.IsSP() || xn.IsSP()) { + mul(a, a, b); + add(xd, xn, a); + } else { + madd(xd, a, b, xn); + } +} + +void MacroAssembler::CalculateSVEAddress(const Register& xd, + const SVEMemOperand& addr, + int vl_divisor_log2) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!addr.IsScatterGather()); + VIXL_ASSERT(xd.IsX()); + + // The lower bound is where a whole Z register is accessed. + VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0)); + // The upper bound is for P register accesses, and for instructions like + // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane. + VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2)); + + SVEOffsetModifier mod = addr.GetOffsetModifier(); + Register base = addr.GetScalarBase(); + + if (addr.IsEquivalentToScalar()) { + // For example: + // [x0] + // [x0, #0] + // [x0, xzr, LSL 2] + Mov(xd, base); + } else if (addr.IsScalarPlusImmediate()) { + // For example: + // [x0, #42] + // [x0, #42, MUL VL] + int64_t offset = addr.GetImmediateOffset(); + VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar. + if (addr.IsMulVl()) { + int vl_divisor = 1 << vl_divisor_log2; + // For all possible values of vl_divisor, we can simply use `Addpl`. This + // will select `addvl` if necessary. + VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0); + Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor)); + } else { + // IsScalarPlusImmediate() ensures that no other modifiers can occur. + VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); + Add(xd, base, offset); + } + } else if (addr.IsScalarPlusScalar()) { + // For example: + // [x0, x1] + // [x0, x1, LSL #4] + Register offset = addr.GetScalarOffset(); + VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar. + if (mod == SVE_LSL) { + Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount())); + } else { + // IsScalarPlusScalar() ensures that no other modifiers can occur. + VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); + Add(xd, base, offset); + } + } else { + // All other forms are scatter-gather addresses, which cannot be evaluated + // into an X register. + VIXL_UNREACHABLE(); + } +} + +void MacroAssembler::Cpy(const ZRegister& zd, + const PRegister& pg, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInLane(zd)); + int imm8; + int shift; + if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { + SingleEmissionCheckScope guard(this); + cpy(zd, pg, imm8, shift); + return; + } + + // The fallbacks rely on `cpy` variants that only support merging predication. + // If zeroing predication was requested, zero the destination first. + if (pg.IsZeroing()) { + SingleEmissionCheckScope guard(this); + dup(zd, 0); + } + PRegisterM pg_m = pg.Merging(); + + // Try to encode the immediate using fcpy. + VIXL_ASSERT(imm.FitsInLane(zd)); + if (zd.GetLaneSizeInBits() >= kHRegSize) { + double fp_imm = 0.0; + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + fp_imm = + FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN); + break; + case kSRegSize: + fp_imm = RawbitsToFloat(imm.AsUint32()); + break; + case kDRegSize: + fp_imm = RawbitsToDouble(imm.AsUint64()); + break; + default: + VIXL_UNREACHABLE(); + break; + } + // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so + // we can use IsImmFP64 for all lane sizes. + if (IsImmFP64(fp_imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg_m, fp_imm); + return; + } + } + + // Fall back to using a scratch register. + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireRegisterToHoldLane(zd); + Mov(scratch, imm); + + SingleEmissionCheckScope guard(this); + cpy(zd, pg_m, scratch); +} + +// TODO: We implement Fcpy (amongst other things) for all FP types because it +// allows us to preserve user-specified NaNs. We should come up with some +// FPImmediate type to abstract this, and avoid all the duplication below (and +// elsewhere). + +void MacroAssembler::Fcpy(const ZRegister& zd, + const PRegisterM& pg, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pg.IsMerging()); + + if (IsImmFP64(imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg, imm); + return; + } + + // As a fall-back, cast the immediate to the required lane size, and try to + // encode the bit pattern using `Cpy`. + Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); +} + +void MacroAssembler::Fcpy(const ZRegister& zd, + const PRegisterM& pg, + float imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pg.IsMerging()); + + if (IsImmFP32(imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg, imm); + return; + } + + // As a fall-back, cast the immediate to the required lane size, and try to + // encode the bit pattern using `Cpy`. + Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); +} + +void MacroAssembler::Fcpy(const ZRegister& zd, + const PRegisterM& pg, + Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pg.IsMerging()); + + if (IsImmFP16(imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg, imm); + return; + } + + // As a fall-back, cast the immediate to the required lane size, and try to + // encode the bit pattern using `Cpy`. + Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); +} + +void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInLane(zd)); + unsigned lane_size = zd.GetLaneSizeInBits(); + int imm8; + int shift; + if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { + SingleEmissionCheckScope guard(this); + dup(zd, imm8, shift); + } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) { + SingleEmissionCheckScope guard(this); + dupm(zd, imm.AsUintN(lane_size)); + } else { + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireRegisterToHoldLane(zd); + Mov(scratch, imm); + + SingleEmissionCheckScope guard(this); + dup(zd, scratch); + } +} + +void MacroAssembler::NoncommutativeArithmeticHelper( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + SVEArithPredicatedFn rev_fn) { + if (zd.Aliases(zn)) { + // E.g. zd = zd / zm + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, pg, zn, zm); + } else if (zd.Aliases(zm)) { + // E.g. zd = zn / zd + SingleEmissionCheckScope guard(this); + (this->*rev_fn)(zd, pg, zm, zn); + } else { + // E.g. zd = zn / zm + MovprfxHelperScope guard(this, zd, pg, zn); + (this->*fn)(zd, pg, zd, zm); + } +} + +void MacroAssembler::FPCommutativeArithmeticHelper( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + FPMacroNaNPropagationOption nan_option) { + ResolveFPNaNPropagationOption(&nan_option); + + if (zd.Aliases(zn)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, pg, zd, zm); + } else if (zd.Aliases(zm)) { + switch (nan_option) { + case FastNaNPropagation: { + // Swap the arguments. + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, pg, zd, zn); + return; + } + case StrictNaNPropagation: { + UseScratchRegisterScope temps(this); + // Use a scratch register to keep the argument order exactly as + // specified. + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); + { + MovprfxHelperScope guard(this, scratch, pg, zn); + (this->*fn)(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + return; + } + case NoFPMacroNaNPropagationSelected: + VIXL_UNREACHABLE(); + return; + } + } else { + MovprfxHelperScope guard(this, zd, pg, zn); + (this->*fn)(zd, pg, zd, zm); + } +} + +void MacroAssembler::Asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::asr), + static_cast<SVEArithPredicatedFn>( + &Assembler::asrr)); +} + +void MacroAssembler::Lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::lsl), + static_cast<SVEArithPredicatedFn>( + &Assembler::lslr)); +} + +void MacroAssembler::Lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::lsr), + static_cast<SVEArithPredicatedFn>( + &Assembler::lsrr)); +} + +void MacroAssembler::Fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fdiv), + static_cast<SVEArithPredicatedFn>( + &Assembler::fdivr)); +} + +void MacroAssembler::Fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fsub), + static_cast<SVEArithPredicatedFn>( + &Assembler::fsubr)); +} + +void MacroAssembler::Fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fadd), + nan_option); +} + +void MacroAssembler::Fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fabd), + nan_option); +} + +void MacroAssembler::Fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fmul), + nan_option); +} + +void MacroAssembler::Fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fmulx), + nan_option); +} + +void MacroAssembler::Fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fmax), + nan_option); +} + +void MacroAssembler::Fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fmin), + nan_option); +} + +void MacroAssembler::Fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fmaxnm), + nan_option); +} + +void MacroAssembler::Fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::fminnm), + nan_option); +} + +void MacroAssembler::Fdup(const ZRegister& zd, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + Fdup(zd, Float16(imm)); + break; + case kSRegSize: + Fdup(zd, static_cast<float>(imm)); + break; + case kDRegSize: + if (IsImmFP64(imm)) { + SingleEmissionCheckScope guard(this); + fdup(zd, imm); + } else { + Dup(zd, DoubleToRawbits(imm)); + } + break; + } +} + +void MacroAssembler::Fdup(const ZRegister& zd, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + Fdup(zd, Float16(imm)); + break; + case kSRegSize: + if (IsImmFP32(imm)) { + SingleEmissionCheckScope guard(this); + fdup(zd, imm); + } else { + Dup(zd, FloatToRawbits(imm)); + } + break; + case kDRegSize: + Fdup(zd, static_cast<double>(imm)); + break; + } +} + +void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + if (IsImmFP16(imm)) { + SingleEmissionCheckScope guard(this); + fdup(zd, imm); + } else { + Dup(zd, Float16ToRawbits(imm)); + } + break; + case kSRegSize: + Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN)); + break; + case kDRegSize: + Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); + break; + } +} + +void MacroAssembler::Index(const ZRegister& zd, + const Operand& start, + const Operand& step) { + class IndexOperand : public Operand { + public: + static IndexOperand Prepare(MacroAssembler* masm, + UseScratchRegisterScope* temps, + const Operand& op, + const ZRegister& zd) { + // Look for encodable immediates. + int imm; + if (op.IsImmediate()) { + if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd, &imm)) { + return IndexOperand(imm); + } + Register scratch = temps->AcquireRegisterToHoldLane(zd); + masm->Mov(scratch, op); + return IndexOperand(scratch); + } else { + // Plain registers can be encoded directly. + VIXL_ASSERT(op.IsPlainRegister()); + return IndexOperand(op.GetRegister()); + } + } + + int GetImm5() const { + int64_t imm = GetImmediate(); + VIXL_ASSERT(IsInt5(imm)); + return static_cast<int>(imm); + } + + private: + explicit IndexOperand(const Register& reg) : Operand(reg) {} + explicit IndexOperand(int64_t imm) : Operand(imm) {} + }; + + UseScratchRegisterScope temps(this); + IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd); + IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd); + + SingleEmissionCheckScope guard(this); + if (start_enc.IsImmediate()) { + if (step_enc.IsImmediate()) { + index(zd, start_enc.GetImm5(), step_enc.GetImm5()); + } else { + index(zd, start_enc.GetImm5(), step_enc.GetRegister()); + } + } else { + if (step_enc.IsImmediate()) { + index(zd, start_enc.GetRegister(), step_enc.GetImm5()); + } else { + index(zd, start_enc.GetRegister(), step_enc.GetRegister()); + } + } +} + +void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInLane(zdn)); + + if (imm.IsZero()) { + SingleEmissionCheckScope guard(this); + insr(zdn, xzr); + return; + } + + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireRegisterToHoldLane(zdn); + + // TODO: There are many cases where we could optimise immediates, such as by + // detecting repeating patterns or FP immediates. We should optimise and + // abstract this for use in other SVE mov-immediate-like macros. + Mov(scratch, imm); + + SingleEmissionCheckScope guard(this); + insr(zdn, scratch); +} + +void MacroAssembler::Mla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(za)) { + // zda = zda + (zn * zm) + SingleEmissionCheckScope guard(this); + mla(zd, pg, zn, zm); + } else if (zd.Aliases(zn)) { + // zdn = za + (zdn * zm) + SingleEmissionCheckScope guard(this); + mad(zd, pg, zm, za); + } else if (zd.Aliases(zm)) { + // Multiplication is commutative, so we can swap zn and zm. + // zdm = za + (zdm * zn) + SingleEmissionCheckScope guard(this); + mad(zd, pg, zn, za); + } else { + // zd = za + (zn * zm) + ExactAssemblyScope guard(this, 2 * kInstructionSize); + movprfx(zd, pg, za); + mla(zd, pg, zn, zm); + } +} + +void MacroAssembler::Mls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(za)) { + // zda = zda - (zn * zm) + SingleEmissionCheckScope guard(this); + mls(zd, pg, zn, zm); + } else if (zd.Aliases(zn)) { + // zdn = za - (zdn * zm) + SingleEmissionCheckScope guard(this); + msb(zd, pg, zm, za); + } else if (zd.Aliases(zm)) { + // Multiplication is commutative, so we can swap zn and zm. + // zdm = za - (zdm * zn) + SingleEmissionCheckScope guard(this); + msb(zd, pg, zn, za); + } else { + // zd = za - (zn * zm) + ExactAssemblyScope guard(this, 2 * kInstructionSize); + movprfx(zd, pg, za); + mls(zd, pg, zn, zm); + } +} + +void MacroAssembler::CompareHelper(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + UseScratchRegisterScope temps(this); + ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Dup(zm, imm); + SingleEmissionCheckScope guard(this); + cmp(cond, pd, pg, zn, zm); +} + +void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pd.IsLaneSizeB()); + VIXL_ASSERT(pn.IsLaneSizeB()); + if (pd.Is(pn)) { + SingleEmissionCheckScope guard(this); + pfirst(pd, pg, pn); + } else { + UseScratchRegisterScope temps(this); + PRegister temp_pg = pg; + if (pd.Aliases(pg)) { + temp_pg = temps.AcquireP(); + Mov(temp_pg.VnB(), pg.VnB()); + } + Mov(pd, pn); + SingleEmissionCheckScope guard(this); + pfirst(pd, temp_pg, pd); + } +} + +void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(pd, pn)); + if (pd.Is(pn)) { + SingleEmissionCheckScope guard(this); + pnext(pd, pg, pn); + } else { + UseScratchRegisterScope temps(this); + PRegister temp_pg = pg; + if (pd.Aliases(pg)) { + temp_pg = temps.AcquireP(); + Mov(temp_pg.VnB(), pg.VnB()); + } + Mov(pd.VnB(), pn.VnB()); + SingleEmissionCheckScope guard(this); + pnext(pd, temp_pg, pd); + } +} + +void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern, + FlagsUpdate s) { + VIXL_ASSERT(allow_macro_instructions_); + switch (s) { + case LeaveFlags: + Ptrue(pd, pattern); + return; + case SetFlags: + Ptrues(pd, pattern); + return; + } + VIXL_UNREACHABLE(); +} + +void MacroAssembler::Sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::sdiv), + static_cast<SVEArithPredicatedFn>( + &Assembler::sdivr)); +} + +void MacroAssembler::Sub(const ZRegister& zd, + IntegerOperand imm, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + + int imm8; + int shift = -1; + if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { + MovprfxHelperScope guard(this, zd, zm); + subr(zd, zd, imm8, shift); + } else { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits()); + Dup(scratch, imm); + + SingleEmissionCheckScope guard(this); + sub(zd, scratch, zm); + } +} + +void MacroAssembler::Sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::sub), + static_cast<SVEArithPredicatedFn>( + &Assembler::subr)); +} + +void MacroAssembler::Udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + NoncommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast<SVEArithPredicatedFn>( + &Assembler::udiv), + static_cast<SVEArithPredicatedFn>( + &Assembler::udivr)); +} + +void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + SVELoadBroadcastFn fn, + int divisor) { + VIXL_ASSERT(addr.IsScalarPlusImmediate()); + int64_t imm = addr.GetImmediateOffset(); + if ((imm % divisor == 0) && IsUint6(imm / divisor)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + } else { + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + CalculateSVEAddress(scratch, addr, zt); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(scratch)); + } +} + +void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt, + const SVEMemOperand& addr, + SVELoadStoreFn fn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister()); + + if (addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) && + addr.IsMulVl())) { + SingleEmissionCheckScope guard(this); + (this->*fn)(rt, addr); + return; + } + + if (addr.IsEquivalentToScalar()) { + SingleEmissionCheckScope guard(this); + (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase())); + return; + } + + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + CalculateSVEAddress(scratch, addr, rt); + SingleEmissionCheckScope guard(this); + (this->*fn)(rt, SVEMemOperand(scratch)); +} + +template <typename Tg, typename Tf> +void MacroAssembler::SVELoadStoreScalarImmHelper( + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn, + int imm_bits, + int shift_amount, + SVEOffsetModifier supported_modifier, + int vl_divisor_log2) { + VIXL_ASSERT(allow_macro_instructions_); + int imm_divisor = 1 << shift_amount; + + if (addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && + IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) && + ((addr.GetImmediateOffset() % imm_divisor) == 0) && + (addr.GetOffsetModifier() == supported_modifier))) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + if (addr.IsEquivalentToScalar()) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); + return; + } + + if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) && + (vl_divisor_log2 == -1)) { + // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL + // dependent. + VIXL_UNIMPLEMENTED(); + } + + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + CalculateSVEAddress(scratch, addr, vl_divisor_log2); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(scratch)); +} + +template <typename Tg, typename Tf> +void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2, + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn) { + if (addr.IsPlainScalar() || + (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && + addr.IsEquivalentToLSL(msize_in_bytes_log2)) || + (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) && + addr.IsMulVl())) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + if (addr.IsEquivalentToScalar()) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); + return; + } + + if (addr.IsVectorPlusImmediate()) { + uint64_t offset = addr.GetImmediateOffset(); + if (IsMultiple(offset, (1 << msize_in_bytes_log2)) && + IsUint5(offset >> msize_in_bytes_log2)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + } + + if (addr.IsScalarPlusVector()) { + VIXL_ASSERT(addr.IsScatterGather()); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + UseScratchRegisterScope temps(this); + if (addr.IsScatterGather()) { + // In scatter-gather modes, zt and zn/zm have the same lane size. However, + // for 32-bit accesses, the result of each lane's address calculation still + // requires 64 bits; we can't naively use `Adr` for the address calculation + // because it would truncate each address to 32 bits. + + if (addr.IsVectorPlusImmediate()) { + // Synthesise the immediate in an X register, then use a + // scalar-plus-vector access with the original vector. + Register scratch = temps.AcquireX(); + Mov(scratch, addr.GetImmediateOffset()); + SingleEmissionCheckScope guard(this); + SVEOffsetModifier om = + zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER; + (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om)); + return; + } + + VIXL_UNIMPLEMENTED(); + } else { + Register scratch = temps.AcquireX(); + // TODO: If we have an immediate offset that is a multiple of + // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to + // save an instruction. + int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2; + CalculateSVEAddress(scratch, addr, vl_divisor_log2); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(scratch)); + } +} + +template <typename Tf> +void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + Tf fn) { + if (addr.IsScatterGather()) { + // Scatter-gather first-fault loads share encodings with normal loads. + SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn); + return; + } + + // Contiguous first-faulting loads have no scalar-plus-immediate form at all, + // so we don't do immediate synthesis. + + // We cannot currently distinguish "[x0]" from "[x0, #0]", and this + // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here. + if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() && + addr.IsEquivalentToLSL(msize_in_bytes_log2))) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + VIXL_UNIMPLEMENTED(); +} + +void MacroAssembler::Ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1b)); +} + +void MacroAssembler::Ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1h)); +} + +void MacroAssembler::Ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kWRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1w)); +} + +void MacroAssembler::Ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kDRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1d)); +} + +void MacroAssembler::Ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1sb)); +} + +void MacroAssembler::Ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1sh)); +} + +void MacroAssembler::Ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ld1sw)); +} + +void MacroAssembler::St1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVEStore1Fn>(&Assembler::st1b)); +} + +void MacroAssembler::St1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVEStore1Fn>(&Assembler::st1h)); +} + +void MacroAssembler::St1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVEStore1Fn>(&Assembler::st1w)); +} + +void MacroAssembler::St1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kDRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVEStore1Fn>(&Assembler::st1d)); +} + +void MacroAssembler::Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1b)); +} + +void MacroAssembler::Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1h)); +} + +void MacroAssembler::Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1w)); +} + +void MacroAssembler::Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kDRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1d)); +} + +void MacroAssembler::Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1sb)); +} + +void MacroAssembler::Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1sh)); +} + +void MacroAssembler::Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast<SVELoad1Fn>(&Assembler::ldff1sw)); +} + +void MacroAssembler::Ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rqb, + 4, + 4, + NO_SVE_OFFSET_MODIFIER, + -1); +} + +void MacroAssembler::Ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rqd, + 4, + 4, + NO_SVE_OFFSET_MODIFIER, + -1); +} + +void MacroAssembler::Ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rqh, + 4, + 4, + NO_SVE_OFFSET_MODIFIER, + -1); +} + +void MacroAssembler::Ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rqw, + 4, + 4, + NO_SVE_OFFSET_MODIFIER, + -1); +} + +void MacroAssembler::Ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1b, + 4, + 0, + SVE_MUL_VL); +} + +void MacroAssembler::Ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1d, + 4, + 0, + SVE_MUL_VL); +} + +void MacroAssembler::Ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1h, + 4, + 0, + SVE_MUL_VL); +} + +void MacroAssembler::Ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1w, + 4, + 0, + SVE_MUL_VL); +} + +void MacroAssembler::Stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::stnt1b, + 4, + 0, + SVE_MUL_VL); +} +void MacroAssembler::Stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::stnt1d, + 4, + 0, + SVE_MUL_VL); +} +void MacroAssembler::Stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::stnt1h, + 4, + 0, + SVE_MUL_VL); +} +void MacroAssembler::Stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(zt, + pg, + addr, + &MacroAssembler::stnt1w, + 4, + 0, + SVE_MUL_VL); +} + +void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + if (zd.Aliases(za)) { + // zda = zda + (zn . zm) + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, zn, zm, index); + + } else if (zd.Aliases(zn) || zd.Aliases(zm)) { + // zdn = za + (zdn . zm[index]) + // zdm = za + (zn . zdm[index]) + // zdnm = za + (zdnm . zdnm[index]) + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm, index); + } + + Mov(zd, scratch); + } else { + // zd = za + (zn . zm) + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm, index); + } +} + +void MacroAssembler::SVESdotUdotHelper(IntArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (zd.Aliases(za)) { + // zda = zda + (zn . zm) + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, zn, zm); + + } else if (zd.Aliases(zn) || zd.Aliases(zm)) { + // zdn = za + (zdn . zm) + // zdm = za + (zn . zdm) + // zdnm = za + (zdnm . zdnm) + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm); + } + + Mov(zd, scratch); + } else { + // zd = za + (zn . zm) + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm); + } +} + +void MacroAssembler::Fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); + Mov(scratch, zm); + MovprfxHelperScope guard(this, zd, pg, zn); + fscale(zd, pg, zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, pg, zn); + fscale(zd, pg, zd, zm); + } +} + +void MacroAssembler::Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SVESdotUdotHelper(&Assembler::sdot, zd, za, zn, zm); +} + +void MacroAssembler::Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVESdotUdotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); +} + +void MacroAssembler::Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SVESdotUdotHelper(&Assembler::udot, zd, za, zn, zm); +} + +void MacroAssembler::Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVESdotUdotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); +} + +void MacroAssembler::FPMulAddHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + SVEMulAddPredicatedZdaFn fn_zda, + SVEMulAddPredicatedZdnFn fn_zdn, + FPMacroNaNPropagationOption nan_option) { + ResolveFPNaNPropagationOption(&nan_option); + + if (zd.Aliases(za)) { + // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. + SingleEmissionCheckScope guard(this); + (this->*fn_zda)(zd, pg, zn, zm); + } else if (zd.Aliases(zn)) { + // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb. + SingleEmissionCheckScope guard(this); + (this->*fn_zdn)(zd, pg, zm, za); + } else if (zd.Aliases(zm)) { + switch (nan_option) { + case FastNaNPropagation: { + // We treat multiplication as commutative in the fast mode, so we can + // swap zn and zm. + // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb. + SingleEmissionCheckScope guard(this); + (this->*fn_zdn)(zd, pg, zn, za); + return; + } + case StrictNaNPropagation: { + UseScratchRegisterScope temps(this); + // Use a scratch register to keep the argument order exactly as + // specified. + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); + { + MovprfxHelperScope guard(this, scratch, pg, za); + // scratch = (-)za + ((-)zn * zm) + (this->*fn_zda)(scratch, pg, zn, zm); + } + Mov(zd, scratch); + return; + } + case NoFPMacroNaNPropagationSelected: + VIXL_UNREACHABLE(); + return; + } + } else { + // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. + MovprfxHelperScope guard(this, zd, pg, za); + (this->*fn_zda)(zd, pg, zn, zm); + } +} + +void MacroAssembler::FPMulAddIndexHelper(SVEMulAddIndexFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + if (zd.Aliases(za)) { + // zda = zda + (zn * zm[i]) + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, zn, zm, index); + + } else if (zd.Aliases(zn) || zd.Aliases(zm)) { + // zdn = za + (zdn * zm[i]) + // zdm = za + (zn * zdm[i]) + // zdnm = za + (zdnm * zdnm[i]) + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm, index); + } + Mov(zd, scratch); + } else { + // zd = za + (zn * zm[i]) + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm, index); + } +} + +void MacroAssembler::Fmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fmla, + &Assembler::fmad, + nan_option); +} + +void MacroAssembler::Fmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddIndexHelper(&Assembler::fmla, zd, za, zn, zm, index); +} + +void MacroAssembler::Fmls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fmls, + &Assembler::fmsb, + nan_option); +} + +void MacroAssembler::Fmls(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddIndexHelper(&Assembler::fmls, zd, za, zn, zm, index); +} + +void MacroAssembler::Fnmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fnmla, + &Assembler::fnmad, + nan_option); +} + +void MacroAssembler::Fnmls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fnmls, + &Assembler::fnmsb, + nan_option); +} + +void MacroAssembler::Ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); + Mov(scratch, zm); + MovprfxHelperScope guard(this, zd, zn); + ftmad(zd, zd, scratch, imm3); + } else { + MovprfxHelperScope guard(this, zd, zn); + ftmad(zd, zd, zm, imm3); + } +} + +void MacroAssembler::Fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, pg, zn); + fcadd(scratch, pg, scratch, zm, rot); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, pg, zn); + fcadd(zd, pg, zd, zm, rot); + } +} + +void MacroAssembler::Ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + // zd = ext(zn, zd, offset) + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + ext(scratch, scratch, zm, offset); + } + Mov(zd, scratch); + } else { + // zd = ext(zn, zm, offset) + // zd = ext(zd, zd, offset) + MovprfxHelperScope guard(this, zd, zn); + ext(zd, zd, zm, offset); + } +} + +void MacroAssembler::Splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + splice(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, zn); + splice(zd, pg, zd, zm); + } +} + +void MacroAssembler::Clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + clasta(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, zn); + clasta(zd, pg, zd, zm); + } +} + +void MacroAssembler::Clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + clastb(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, zn); + clastb(zd, pg, zd, zm); + } +} + +} // namespace aarch64 +} // namespace vixl diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc index 20364616..008179e4 100644 --- a/src/aarch64/operands-aarch64.cc +++ b/src/aarch64/operands-aarch64.cc @@ -30,32 +30,32 @@ namespace vixl { namespace aarch64 { // CPURegList utilities. -CPURegister CPURegList::PopLowestIndex() { - if (IsEmpty()) { - return NoCPUReg; - } - int index = CountTrailingZeros(list_); - VIXL_ASSERT((1 << index) & list_); +CPURegister CPURegList::PopLowestIndex(RegList mask) { + RegList list = list_ & mask; + if (list == 0) return NoCPUReg; + int index = CountTrailingZeros(list); + VIXL_ASSERT(((1 << index) & list) != 0); Remove(index); return CPURegister(index, size_, type_); } -CPURegister CPURegList::PopHighestIndex() { - VIXL_ASSERT(IsValid()); - if (IsEmpty()) { - return NoCPUReg; - } - int index = CountLeadingZeros(list_); +CPURegister CPURegList::PopHighestIndex(RegList mask) { + RegList list = list_ & mask; + if (list == 0) return NoCPUReg; + int index = CountLeadingZeros(list); index = kRegListSizeInBits - 1 - index; - VIXL_ASSERT((1 << index) & list_); + VIXL_ASSERT(((1 << index) & list) != 0); Remove(index); return CPURegister(index, size_, type_); } bool CPURegList::IsValid() const { - if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) { + if (type_ == CPURegister::kNoRegister) { + // We can't use IsEmpty here because that asserts IsValid(). + return list_ == 0; + } else { bool is_valid = true; // Try to create a CPURegister for each element in the list. for (int i = 0; i < kRegListSizeInBits; i++) { @@ -64,11 +64,6 @@ bool CPURegList::IsValid() const { } } return is_valid; - } else if (type_ == CPURegister::kNoRegister) { - // We can't use IsEmpty here because that asserts IsValid(). - return list_ == 0; - } else { - return false; } } @@ -149,145 +144,6 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV(); const CPURegList kCallerSaved = CPURegList::GetCallerSaved(); const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV(); - -// Registers. -#define WREG(n) w##n, -const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)}; -#undef WREG - -#define XREG(n) x##n, -const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)}; -#undef XREG - -#define BREG(n) b##n, -const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)}; -#undef BREG - -#define HREG(n) h##n, -const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)}; -#undef HREG - -#define SREG(n) s##n, -const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)}; -#undef SREG - -#define DREG(n) d##n, -const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)}; -#undef DREG - -#define QREG(n) q##n, -const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)}; -#undef QREG - -#define VREG(n) v##n, -const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)}; -#undef VREG - - -const Register& Register::GetWRegFromCode(unsigned code) { - if (code == kSPRegInternalCode) { - return wsp; - } else { - VIXL_ASSERT(code < kNumberOfRegisters); - return wregisters[code]; - } -} - - -const Register& Register::GetXRegFromCode(unsigned code) { - if (code == kSPRegInternalCode) { - return sp; - } else { - VIXL_ASSERT(code < kNumberOfRegisters); - return xregisters[code]; - } -} - - -const VRegister& VRegister::GetBRegFromCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return bregisters[code]; -} - - -const VRegister& VRegister::GetHRegFromCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return hregisters[code]; -} - - -const VRegister& VRegister::GetSRegFromCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return sregisters[code]; -} - - -const VRegister& VRegister::GetDRegFromCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return dregisters[code]; -} - - -const VRegister& VRegister::GetQRegFromCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return qregisters[code]; -} - - -const VRegister& VRegister::GetVRegFromCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return vregisters[code]; -} - - -const Register& CPURegister::W() const { - VIXL_ASSERT(IsValidRegister()); - return Register::GetWRegFromCode(code_); -} - - -const Register& CPURegister::X() const { - VIXL_ASSERT(IsValidRegister()); - return Register::GetXRegFromCode(code_); -} - - -const VRegister& CPURegister::B() const { - VIXL_ASSERT(IsValidVRegister()); - return VRegister::GetBRegFromCode(code_); -} - - -const VRegister& CPURegister::H() const { - VIXL_ASSERT(IsValidVRegister()); - return VRegister::GetHRegFromCode(code_); -} - - -const VRegister& CPURegister::S() const { - VIXL_ASSERT(IsValidVRegister()); - return VRegister::GetSRegFromCode(code_); -} - - -const VRegister& CPURegister::D() const { - VIXL_ASSERT(IsValidVRegister()); - return VRegister::GetDRegFromCode(code_); -} - - -const VRegister& CPURegister::Q() const { - VIXL_ASSERT(IsValidVRegister()); - return VRegister::GetQRegFromCode(code_); -} - - -const VRegister& CPURegister::V() const { - VIXL_ASSERT(IsValidVRegister()); - return VRegister::GetVRegFromCode(code_); -} - - // Operand. Operand::Operand(int64_t immediate) : immediate_(immediate), @@ -296,6 +152,12 @@ Operand::Operand(int64_t immediate) extend_(NO_EXTEND), shift_amount_(0) {} +Operand::Operand(IntegerOperand immediate) + : immediate_(immediate.AsIntN(64)), + reg_(NoReg), + shift_(NO_SHIFT), + extend_(NO_EXTEND), + shift_amount_(0) {} Operand::Operand(Register reg, Shift shift, unsigned shift_amount) : reg_(reg), @@ -471,6 +333,24 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode) } +bool MemOperand::IsPlainRegister() const { + return IsImmediateOffset() && (GetOffset() == 0); +} + + +bool MemOperand::IsEquivalentToPlainRegister() const { + if (regoffset_.Is(NoReg)) { + // Immediate offset, pre-index or post-index. + return GetOffset() == 0; + } else if (GetRegisterOffset().IsZero()) { + // Zero register offset, pre-index or post-index. + // We can ignore shift and extend options because they all result in zero. + return true; + } + return false; +} + + bool MemOperand::IsImmediateOffset() const { return (addrmode_ == Offset) && regoffset_.Is(NoReg); } @@ -493,6 +373,62 @@ void MemOperand::AddOffset(int64_t offset) { } +bool SVEMemOperand::IsValid() const { +#ifdef VIXL_DEBUG + { + // It should not be possible for an SVEMemOperand to match multiple types. + int count = 0; + if (IsScalarPlusImmediate()) count++; + if (IsScalarPlusScalar()) count++; + if (IsScalarPlusVector()) count++; + if (IsVectorPlusImmediate()) count++; + if (IsVectorPlusVector()) count++; + VIXL_ASSERT(count <= 1); + } +#endif + + // We can't have a register _and_ an immediate offset. + if ((offset_ != 0) && (!regoffset_.IsNone())) return false; + + if (shift_amount_ != 0) { + // Only shift and extend modifiers can take a shift amount. + switch (mod_) { + case NO_SVE_OFFSET_MODIFIER: + case SVE_MUL_VL: + return false; + case SVE_LSL: + case SVE_UXTW: + case SVE_SXTW: + // Fall through. + break; + } + } + + return IsScalarPlusImmediate() || IsScalarPlusScalar() || + IsScalarPlusVector() || IsVectorPlusImmediate() || + IsVectorPlusVector(); +} + + +bool SVEMemOperand::IsEquivalentToScalar() const { + if (IsScalarPlusImmediate()) { + return GetImmediateOffset() == 0; + } + if (IsScalarPlusScalar()) { + // We can ignore the shift because it will still result in zero. + return GetScalarOffset().IsZero(); + } + // Forms involving vectors are never equivalent to a single scalar. + return false; +} + +bool SVEMemOperand::IsPlainRegister() const { + if (IsScalarPlusImmediate()) { + return GetImmediateOffset() == 0; + } + return false; +} + GenericOperand::GenericOperand(const CPURegister& reg) : cpu_register_(reg), mem_op_size_(0) { if (reg.IsQ()) { diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h index bfc6b702..ad03a9ee 100644 --- a/src/aarch64/operands-aarch64.h +++ b/src/aarch64/operands-aarch64.h @@ -27,525 +27,15 @@ #ifndef VIXL_AARCH64_OPERANDS_AARCH64_H_ #define VIXL_AARCH64_OPERANDS_AARCH64_H_ +#include <sstream> +#include <string> + #include "instructions-aarch64.h" +#include "registers-aarch64.h" namespace vixl { namespace aarch64 { -typedef uint64_t RegList; -static const int kRegListSizeInBits = sizeof(RegList) * 8; - - -// Registers. - -// Some CPURegister methods can return Register or VRegister types, so we need -// to declare them in advance. -class Register; -class VRegister; - -class CPURegister { - public: - enum RegisterType { - // The kInvalid value is used to detect uninitialized static instances, - // which are always zero-initialized before any constructors are called. - kInvalid = 0, - kRegister, - kVRegister, - kNoRegister - }; - - CPURegister() : code_(0), size_(0), type_(kNoRegister) { - VIXL_ASSERT(!IsValid()); - VIXL_ASSERT(IsNone()); - } - - CPURegister(unsigned code, unsigned size, RegisterType type) - : code_(code), size_(size), type_(type) { - VIXL_ASSERT(IsValidOrNone()); - } - - unsigned GetCode() const { - VIXL_ASSERT(IsValid()); - return code_; - } - VIXL_DEPRECATED("GetCode", unsigned code() const) { return GetCode(); } - - RegisterType GetType() const { - VIXL_ASSERT(IsValidOrNone()); - return type_; - } - VIXL_DEPRECATED("GetType", RegisterType type() const) { return GetType(); } - - RegList GetBit() const { - VIXL_ASSERT(code_ < (sizeof(RegList) * 8)); - return IsValid() ? (static_cast<RegList>(1) << code_) : 0; - } - VIXL_DEPRECATED("GetBit", RegList Bit() const) { return GetBit(); } - - int GetSizeInBytes() const { - VIXL_ASSERT(IsValid()); - VIXL_ASSERT(size_ % 8 == 0); - return size_ / 8; - } - VIXL_DEPRECATED("GetSizeInBytes", int SizeInBytes() const) { - return GetSizeInBytes(); - } - - int GetSizeInBits() const { - VIXL_ASSERT(IsValid()); - return size_; - } - VIXL_DEPRECATED("GetSizeInBits", unsigned size() const) { - return GetSizeInBits(); - } - VIXL_DEPRECATED("GetSizeInBits", int SizeInBits() const) { - return GetSizeInBits(); - } - - bool Is8Bits() const { - VIXL_ASSERT(IsValid()); - return size_ == 8; - } - - bool Is16Bits() const { - VIXL_ASSERT(IsValid()); - return size_ == 16; - } - - bool Is32Bits() const { - VIXL_ASSERT(IsValid()); - return size_ == 32; - } - - bool Is64Bits() const { - VIXL_ASSERT(IsValid()); - return size_ == 64; - } - - bool Is128Bits() const { - VIXL_ASSERT(IsValid()); - return size_ == 128; - } - - bool IsValid() const { - if (IsValidRegister() || IsValidVRegister()) { - VIXL_ASSERT(!IsNone()); - return true; - } else { - // This assert is hit when the register has not been properly initialized. - // One cause for this can be an initialisation order fiasco. See - // https://isocpp.org/wiki/faq/ctors#static-init-order for some details. - VIXL_ASSERT(IsNone()); - return false; - } - } - - bool IsValidRegister() const { - return IsRegister() && ((size_ == kWRegSize) || (size_ == kXRegSize)) && - ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)); - } - - bool IsValidVRegister() const { - return IsVRegister() && ((size_ == kBRegSize) || (size_ == kHRegSize) || - (size_ == kSRegSize) || (size_ == kDRegSize) || - (size_ == kQRegSize)) && - (code_ < kNumberOfVRegisters); - } - - bool IsValidFPRegister() const { - return IsValidVRegister() && IsFPRegister(); - } - - bool IsNone() const { - // kNoRegister types should always have size 0 and code 0. - VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0)); - VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0)); - - return type_ == kNoRegister; - } - - bool Aliases(const CPURegister& other) const { - VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); - return (code_ == other.code_) && (type_ == other.type_); - } - - bool Is(const CPURegister& other) const { - VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); - return Aliases(other) && (size_ == other.size_); - } - - bool IsZero() const { - VIXL_ASSERT(IsValid()); - return IsRegister() && (code_ == kZeroRegCode); - } - - bool IsSP() const { - VIXL_ASSERT(IsValid()); - return IsRegister() && (code_ == kSPRegInternalCode); - } - - bool IsRegister() const { return type_ == kRegister; } - - bool IsVRegister() const { return type_ == kVRegister; } - - // CPURegister does not track lanes like VRegister does, so we have to assume - // that we have scalar types here. - // TODO: Encode lane information in CPURegister so that we can be consistent. - bool IsFPRegister() const { return IsH() || IsS() || IsD(); } - - bool IsW() const { return IsValidRegister() && Is32Bits(); } - bool IsX() const { return IsValidRegister() && Is64Bits(); } - - // These assertions ensure that the size and type of the register are as - // described. They do not consider the number of lanes that make up a vector. - // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() - // does not imply Is1D() or Is8B(). - // Check the number of lanes, ie. the format of the vector, using methods such - // as Is8B(), Is1D(), etc. in the VRegister class. - bool IsV() const { return IsVRegister(); } - bool IsB() const { return IsV() && Is8Bits(); } - bool IsH() const { return IsV() && Is16Bits(); } - bool IsS() const { return IsV() && Is32Bits(); } - bool IsD() const { return IsV() && Is64Bits(); } - bool IsQ() const { return IsV() && Is128Bits(); } - - // Semantic type for sdot and udot instructions. - bool IsS4B() const { return IsS(); } - const VRegister& S4B() const { return S(); } - - const Register& W() const; - const Register& X() const; - const VRegister& V() const; - const VRegister& B() const; - const VRegister& H() const; - const VRegister& S() const; - const VRegister& D() const; - const VRegister& Q() const; - - bool IsSameType(const CPURegister& other) const { - return type_ == other.type_; - } - - bool IsSameSizeAndType(const CPURegister& other) const { - return (size_ == other.size_) && IsSameType(other); - } - - protected: - unsigned code_; - int size_; - RegisterType type_; - - private: - bool IsValidOrNone() const { return IsValid() || IsNone(); } -}; - - -class Register : public CPURegister { - public: - Register() : CPURegister() {} - explicit Register(const CPURegister& other) - : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()) { - VIXL_ASSERT(IsValidRegister()); - } - Register(unsigned code, unsigned size) : CPURegister(code, size, kRegister) {} - - bool IsValid() const { - VIXL_ASSERT(IsRegister() || IsNone()); - return IsValidRegister(); - } - - static const Register& GetWRegFromCode(unsigned code); - VIXL_DEPRECATED("GetWRegFromCode", - static const Register& WRegFromCode(unsigned code)) { - return GetWRegFromCode(code); - } - - static const Register& GetXRegFromCode(unsigned code); - VIXL_DEPRECATED("GetXRegFromCode", - static const Register& XRegFromCode(unsigned code)) { - return GetXRegFromCode(code); - } - - private: - static const Register wregisters[]; - static const Register xregisters[]; -}; - - -namespace internal { - -template <int size_in_bits> -class FixedSizeRegister : public Register { - public: - FixedSizeRegister() : Register() {} - explicit FixedSizeRegister(unsigned code) : Register(code, size_in_bits) { - VIXL_ASSERT(IsValidRegister()); - } - explicit FixedSizeRegister(const Register& other) - : Register(other.GetCode(), size_in_bits) { - VIXL_ASSERT(other.GetSizeInBits() == size_in_bits); - VIXL_ASSERT(IsValidRegister()); - } - explicit FixedSizeRegister(const CPURegister& other) - : Register(other.GetCode(), other.GetSizeInBits()) { - VIXL_ASSERT(other.GetType() == kRegister); - VIXL_ASSERT(other.GetSizeInBits() == size_in_bits); - VIXL_ASSERT(IsValidRegister()); - } - - bool IsValid() const { - return Register::IsValid() && (GetSizeInBits() == size_in_bits); - } -}; - -} // namespace internal - -typedef internal::FixedSizeRegister<kXRegSize> XRegister; -typedef internal::FixedSizeRegister<kWRegSize> WRegister; - - -class VRegister : public CPURegister { - public: - VRegister() : CPURegister(), lanes_(1) {} - explicit VRegister(const CPURegister& other) - : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()), - lanes_(1) { - VIXL_ASSERT(IsValidVRegister()); - VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); - } - VRegister(unsigned code, unsigned size, unsigned lanes = 1) - : CPURegister(code, size, kVRegister), lanes_(lanes) { - VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); - } - VRegister(unsigned code, VectorFormat format) - : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister), - lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) { - VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); - } - - bool IsValid() const { - VIXL_ASSERT(IsVRegister() || IsNone()); - return IsValidVRegister(); - } - - static const VRegister& GetBRegFromCode(unsigned code); - VIXL_DEPRECATED("GetBRegFromCode", - static const VRegister& BRegFromCode(unsigned code)) { - return GetBRegFromCode(code); - } - - static const VRegister& GetHRegFromCode(unsigned code); - VIXL_DEPRECATED("GetHRegFromCode", - static const VRegister& HRegFromCode(unsigned code)) { - return GetHRegFromCode(code); - } - - static const VRegister& GetSRegFromCode(unsigned code); - VIXL_DEPRECATED("GetSRegFromCode", - static const VRegister& SRegFromCode(unsigned code)) { - return GetSRegFromCode(code); - } - - static const VRegister& GetDRegFromCode(unsigned code); - VIXL_DEPRECATED("GetDRegFromCode", - static const VRegister& DRegFromCode(unsigned code)) { - return GetDRegFromCode(code); - } - - static const VRegister& GetQRegFromCode(unsigned code); - VIXL_DEPRECATED("GetQRegFromCode", - static const VRegister& QRegFromCode(unsigned code)) { - return GetQRegFromCode(code); - } - - static const VRegister& GetVRegFromCode(unsigned code); - VIXL_DEPRECATED("GetVRegFromCode", - static const VRegister& VRegFromCode(unsigned code)) { - return GetVRegFromCode(code); - } - - VRegister V8B() const { return VRegister(code_, kDRegSize, 8); } - VRegister V16B() const { return VRegister(code_, kQRegSize, 16); } - VRegister V2H() const { return VRegister(code_, kSRegSize, 2); } - VRegister V4H() const { return VRegister(code_, kDRegSize, 4); } - VRegister V8H() const { return VRegister(code_, kQRegSize, 8); } - VRegister V2S() const { return VRegister(code_, kDRegSize, 2); } - VRegister V4S() const { return VRegister(code_, kQRegSize, 4); } - VRegister V2D() const { return VRegister(code_, kQRegSize, 2); } - VRegister V1D() const { return VRegister(code_, kDRegSize, 1); } - - bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); } - bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); } - bool Is2H() const { return (Is32Bits() && (lanes_ == 2)); } - bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); } - bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); } - bool Is1S() const { return (Is32Bits() && (lanes_ == 1)); } - bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); } - bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); } - bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); } - bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); } - - // For consistency, we assert the number of lanes of these scalar registers, - // even though there are no vectors of equivalent total size with which they - // could alias. - bool Is1B() const { - VIXL_ASSERT(!(Is8Bits() && IsVector())); - return Is8Bits(); - } - bool Is1H() const { - VIXL_ASSERT(!(Is16Bits() && IsVector())); - return Is16Bits(); - } - - // Semantic type for sdot and udot instructions. - bool Is1S4B() const { return Is1S(); } - - - bool IsLaneSizeB() const { return GetLaneSizeInBits() == kBRegSize; } - bool IsLaneSizeH() const { return GetLaneSizeInBits() == kHRegSize; } - bool IsLaneSizeS() const { return GetLaneSizeInBits() == kSRegSize; } - bool IsLaneSizeD() const { return GetLaneSizeInBits() == kDRegSize; } - - int GetLanes() const { return lanes_; } - VIXL_DEPRECATED("GetLanes", int lanes() const) { return GetLanes(); } - - bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); } - bool IsValidFPRegister() const { - return IsValidVRegister() && IsFPRegister(); - } - - bool IsScalar() const { return lanes_ == 1; } - - bool IsVector() const { return lanes_ > 1; } - - bool IsSameFormat(const VRegister& other) const { - return (size_ == other.size_) && (lanes_ == other.lanes_); - } - - unsigned GetLaneSizeInBytes() const { return GetSizeInBytes() / lanes_; } - VIXL_DEPRECATED("GetLaneSizeInBytes", unsigned LaneSizeInBytes() const) { - return GetLaneSizeInBytes(); - } - - unsigned GetLaneSizeInBits() const { return GetLaneSizeInBytes() * 8; } - VIXL_DEPRECATED("GetLaneSizeInBits", unsigned LaneSizeInBits() const) { - return GetLaneSizeInBits(); - } - - private: - static const VRegister bregisters[]; - static const VRegister hregisters[]; - static const VRegister sregisters[]; - static const VRegister dregisters[]; - static const VRegister qregisters[]; - static const VRegister vregisters[]; - int lanes_; -}; - - -// No*Reg is used to indicate an unused argument, or an error case. Note that -// these all compare equal (using the Is() method). The Register and VRegister -// variants are provided for convenience. -const Register NoReg; -const VRegister NoVReg; -const CPURegister NoCPUReg; - - -#define DEFINE_REGISTERS(N) \ - const WRegister w##N(N); \ - const XRegister x##N(N); -AARCH64_REGISTER_CODE_LIST(DEFINE_REGISTERS) -#undef DEFINE_REGISTERS -const WRegister wsp(kSPRegInternalCode); -const XRegister sp(kSPRegInternalCode); - - -#define DEFINE_VREGISTERS(N) \ - const VRegister b##N(N, kBRegSize); \ - const VRegister h##N(N, kHRegSize); \ - const VRegister s##N(N, kSRegSize); \ - const VRegister d##N(N, kDRegSize); \ - const VRegister q##N(N, kQRegSize); \ - const VRegister v##N(N, kQRegSize); -AARCH64_REGISTER_CODE_LIST(DEFINE_VREGISTERS) -#undef DEFINE_VREGISTERS - - -// Register aliases. -const XRegister ip0 = x16; -const XRegister ip1 = x17; -const XRegister lr = x30; -const XRegister xzr = x31; -const WRegister wzr = w31; - - -// AreAliased returns true if any of the named registers overlap. Arguments -// set to NoReg are ignored. The system stack pointer may be specified. -bool AreAliased(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoReg, - const CPURegister& reg4 = NoReg, - const CPURegister& reg5 = NoReg, - const CPURegister& reg6 = NoReg, - const CPURegister& reg7 = NoReg, - const CPURegister& reg8 = NoReg); - - -// AreSameSizeAndType returns true if all of the specified registers have the -// same size, and are of the same type. The system stack pointer may be -// specified. Arguments set to NoReg are ignored, as are any subsequent -// arguments. At least one argument (reg1) must be valid (not NoCPUReg). -bool AreSameSizeAndType(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg, - const CPURegister& reg5 = NoCPUReg, - const CPURegister& reg6 = NoCPUReg, - const CPURegister& reg7 = NoCPUReg, - const CPURegister& reg8 = NoCPUReg); - -// AreEven returns true if all of the specified registers have even register -// indices. Arguments set to NoReg are ignored, as are any subsequent -// arguments. At least one argument (reg1) must be valid (not NoCPUReg). -bool AreEven(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoReg, - const CPURegister& reg4 = NoReg, - const CPURegister& reg5 = NoReg, - const CPURegister& reg6 = NoReg, - const CPURegister& reg7 = NoReg, - const CPURegister& reg8 = NoReg); - - -// AreConsecutive returns true if all of the specified registers are -// consecutive in the register file. Arguments set to NoReg are ignored, as are -// any subsequent arguments. At least one argument (reg1) must be valid -// (not NoCPUReg). -bool AreConsecutive(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg); - - -// AreSameFormat returns true if all of the specified VRegisters have the same -// vector format. Arguments set to NoReg are ignored, as are any subsequent -// arguments. At least one argument (reg1) must be valid (not NoVReg). -bool AreSameFormat(const VRegister& reg1, - const VRegister& reg2, - const VRegister& reg3 = NoVReg, - const VRegister& reg4 = NoVReg); - - -// AreConsecutive returns true if all of the specified VRegisters are -// consecutive in the register file. Arguments set to NoReg are ignored, as are -// any subsequent arguments. At least one argument (reg1) must be valid -// (not NoVReg). -bool AreConsecutive(const VRegister& reg1, - const VRegister& reg2, - const VRegister& reg3 = NoVReg, - const VRegister& reg4 = NoVReg); - - // Lists of registers. class CPURegList { public: @@ -580,6 +70,28 @@ class CPURegList { VIXL_ASSERT(IsValid()); } + // Construct an empty CPURegList with the specified size and type. If `size` + // is CPURegister::kUnknownSize and the register type requires a size, a valid + // but unspecified default will be picked. + static CPURegList Empty(CPURegister::RegisterType type, + unsigned size = CPURegister::kUnknownSize) { + return CPURegList(type, GetDefaultSizeFor(type, size), 0); + } + + // Construct a CPURegList with all possible registers with the specified size + // and type. If `size` is CPURegister::kUnknownSize and the register type + // requires a size, a valid but unspecified default will be picked. + static CPURegList All(CPURegister::RegisterType type, + unsigned size = CPURegister::kUnknownSize) { + unsigned number_of_registers = (CPURegister::GetMaxCodeFor(type) + 1); + RegList list = (static_cast<RegList>(1) << number_of_registers) - 1; + if (type == CPURegister::kRegister) { + // GetMaxCodeFor(kRegister) ignores SP, so explicitly include it. + list |= (static_cast<RegList>(1) << kSPRegInternalCode); + } + return CPURegList(type, GetDefaultSizeFor(type, size), list); + } + CPURegister::RegisterType GetType() const { VIXL_ASSERT(IsValid()); return type_; @@ -588,6 +100,10 @@ class CPURegList { return GetType(); } + CPURegister::RegisterBank GetBank() const { + return CPURegister::GetBankFor(GetType()); + } + // Combine another CPURegList into this one. Registers that already exist in // this list are left unchanged. The type and size of the registers in the // 'other' list must match those in this list. @@ -684,8 +200,11 @@ class CPURegList { // preparing registers for an AAPCS64 function call, for example. void RemoveCalleeSaved(); - CPURegister PopLowestIndex(); - CPURegister PopHighestIndex(); + // Find the register in this list that appears in `mask` with the lowest or + // highest code, remove it from the list and return it as a CPURegister. If + // the list is empty, leave it unchanged and return NoCPUReg. + CPURegister PopLowestIndex(RegList mask = ~static_cast<RegList>(0)); + CPURegister PopHighestIndex(RegList mask = ~static_cast<RegList>(0)); // AAPCS64 callee-saved registers. static CPURegList GetCalleeSaved(unsigned size = kXRegSize); @@ -704,7 +223,7 @@ class CPURegList { bool IncludesAliasOf(const CPURegister& other) const { VIXL_ASSERT(IsValid()); - return (type_ == other.GetType()) && IncludesAliasOf(other.GetCode()); + return (GetBank() == other.GetBank()) && IncludesAliasOf(other.GetCode()); } bool IncludesAliasOf(int code) const { @@ -744,6 +263,21 @@ class CPURegList { } private: + // If `size` is CPURegister::kUnknownSize and the type requires a known size, + // then return an arbitrary-but-valid size. + // + // Otherwise, the size is checked for validity and returned unchanged. + static unsigned GetDefaultSizeFor(CPURegister::RegisterType type, + unsigned size) { + if (size == CPURegister::kUnknownSize) { + if (type == CPURegister::kRegister) size = kXRegSize; + if (type == CPURegister::kVRegister) size = kQRegSize; + // All other types require kUnknownSize. + } + VIXL_ASSERT(CPURegister(0, size, type).IsValid()); + return size; + } + RegList list_; int size_; CPURegister::RegisterType type_; @@ -761,6 +295,7 @@ extern const CPURegList kCalleeSavedV; extern const CPURegList kCallerSaved; extern const CPURegList kCallerSavedV; +class IntegerOperand; // Operand. class Operand { @@ -769,7 +304,9 @@ class Operand { // where <immediate> is int64_t. // This is allowed to be an implicit constructor because Operand is // a wrapper class that doesn't normally perform any type conversion. - Operand(int64_t immediate = 0); // NOLINT(runtime/explicit) + Operand(int64_t immediate); // NOLINT(runtime/explicit) + + Operand(IntegerOperand immediate); // NOLINT(runtime/explicit) // rm, {<shift> #<shift_amount>} // where <shift> is one of {LSL, LSR, ASR, ROR}. @@ -883,6 +420,16 @@ class MemOperand { return shift_amount_; } + // True for MemOperands which represent something like [x0]. + // Currently, this will also return true for [x0, #0], because MemOperand has + // no way to distinguish the two. + bool IsPlainRegister() const; + + // True for MemOperands which represent something like [x0], or for compound + // MemOperands which are functionally equivalent, such as [x0, #0], [x0, xzr] + // or [x0, wzr, UXTW #3]. + bool IsEquivalentToPlainRegister() const; + // True for immediate-offset (but not indexed) MemOperands. bool IsImmediateOffset() const; // True for register-offset (but not indexed) MemOperands. @@ -918,6 +465,448 @@ class MemOperand { unsigned shift_amount_; }; +// SVE supports memory operands which don't make sense to the core ISA, such as +// scatter-gather forms, in which either the base or offset registers are +// vectors. This class exists to avoid complicating core-ISA code with +// SVE-specific behaviour. +// +// Note that SVE does not support any pre- or post-index modes. +class SVEMemOperand { + public: + // "vector-plus-immediate", like [z0.s, #21] + explicit SVEMemOperand(ZRegister base, uint64_t offset = 0) + : base_(base), + regoffset_(NoReg), + offset_(RawbitsToInt64(offset)), + mod_(NO_SVE_OFFSET_MODIFIER), + shift_amount_(0) { + VIXL_ASSERT(IsVectorPlusImmediate()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-immediate", like [x0], [x0, #42] or [x0, #42, MUL_VL] + // The only supported modifiers are NO_SVE_OFFSET_MODIFIER or SVE_MUL_VL. + // + // Note that VIXL cannot currently distinguish between `SVEMemOperand(x0)` and + // `SVEMemOperand(x0, 0)`. This is only significant in scalar-plus-scalar + // instructions where xm defaults to xzr. However, users should not rely on + // `SVEMemOperand(x0, 0)` being accepted in such cases. + explicit SVEMemOperand(Register base, + uint64_t offset = 0, + SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER) + : base_(base), + regoffset_(NoReg), + offset_(RawbitsToInt64(offset)), + mod_(mod), + shift_amount_(0) { + VIXL_ASSERT(IsScalarPlusImmediate()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-scalar", like [x0, x1] + // "scalar-plus-vector", like [x0, z1.d] + SVEMemOperand(Register base, CPURegister offset) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(NO_SVE_OFFSET_MODIFIER), + shift_amount_(0) { + VIXL_ASSERT(IsScalarPlusScalar() || IsScalarPlusVector()); + if (offset.IsZero()) VIXL_ASSERT(IsEquivalentToScalar()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-vector", like [x0, z1.d, UXTW] + // The type of `mod` can be any `SVEOffsetModifier` (other than LSL), or a + // corresponding `Extend` value. + template <typename M> + SVEMemOperand(Register base, ZRegister offset, M mod) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(GetSVEOffsetModifierFor(mod)), + shift_amount_(0) { + VIXL_ASSERT(mod_ != SVE_LSL); // LSL requires an explicit shift amount. + VIXL_ASSERT(IsScalarPlusVector()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-scalar", like [x0, x1, LSL #1] + // "scalar-plus-vector", like [x0, z1.d, LSL #2] + // The type of `mod` can be any `SVEOffsetModifier`, or a corresponding + // `Shift` or `Extend` value. + template <typename M> + SVEMemOperand(Register base, CPURegister offset, M mod, unsigned shift_amount) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(GetSVEOffsetModifierFor(mod)), + shift_amount_(shift_amount) { + VIXL_ASSERT(IsValid()); + } + + // "vector-plus-vector", like [z0.d, z1.d, UXTW] + template <typename M = SVEOffsetModifier> + SVEMemOperand(ZRegister base, + ZRegister offset, + M mod = NO_SVE_OFFSET_MODIFIER, + unsigned shift_amount = 0) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(GetSVEOffsetModifierFor(mod)), + shift_amount_(shift_amount) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(IsVectorPlusVector()); + } + + // True for SVEMemOperands which represent something like [x0]. + // This will also return true for [x0, #0], because there is no way + // to distinguish the two. + bool IsPlainScalar() const { + return IsScalarPlusImmediate() && (offset_ == 0); + } + + // True for SVEMemOperands which represent something like [x0], or for + // compound SVEMemOperands which are functionally equivalent, such as + // [x0, #0], [x0, xzr] or [x0, wzr, UXTW #3]. + bool IsEquivalentToScalar() const; + + // True for SVEMemOperands like [x0], [x0, #0], false for [x0, xzr] and + // similar. + bool IsPlainRegister() const; + + bool IsScalarPlusImmediate() const { + return base_.IsX() && regoffset_.IsNone() && + ((mod_ == NO_SVE_OFFSET_MODIFIER) || IsMulVl()); + } + + bool IsScalarPlusScalar() const { + // SVE offers no extend modes for scalar-plus-scalar, so both registers must + // be X registers. + return base_.IsX() && regoffset_.IsX() && + ((mod_ == NO_SVE_OFFSET_MODIFIER) || (mod_ == SVE_LSL)); + } + + bool IsScalarPlusVector() const { + // The modifier can be LSL or an an extend mode (UXTW or SXTW) here. Unlike + // in the core ISA, these extend modes do not imply an S-sized lane, so the + // modifier is independent from the lane size. The architecture describes + // [US]XTW with a D-sized lane as an "unpacked" offset. + return base_.IsX() && regoffset_.IsZRegister() && + (regoffset_.IsLaneSizeS() || regoffset_.IsLaneSizeD()) && !IsMulVl(); + } + + bool IsVectorPlusImmediate() const { + return base_.IsZRegister() && + (base_.IsLaneSizeS() || base_.IsLaneSizeD()) && + regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER); + } + + bool IsVectorPlusVector() const { + return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) && + AreSameFormat(base_, regoffset_) && + (base_.IsLaneSizeS() || base_.IsLaneSizeD()); + } + + bool IsContiguous() const { return !IsScatterGather(); } + bool IsScatterGather() const { + return base_.IsZRegister() || regoffset_.IsZRegister(); + } + + // TODO: If necessary, add helpers like `HasScalarBase()`. + + Register GetScalarBase() const { + VIXL_ASSERT(base_.IsX()); + return Register(base_); + } + + ZRegister GetVectorBase() const { + VIXL_ASSERT(base_.IsZRegister()); + VIXL_ASSERT(base_.HasLaneSize()); + return ZRegister(base_); + } + + Register GetScalarOffset() const { + VIXL_ASSERT(regoffset_.IsRegister()); + return Register(regoffset_); + } + + ZRegister GetVectorOffset() const { + VIXL_ASSERT(regoffset_.IsZRegister()); + VIXL_ASSERT(regoffset_.HasLaneSize()); + return ZRegister(regoffset_); + } + + int64_t GetImmediateOffset() const { + VIXL_ASSERT(regoffset_.IsNone()); + return offset_; + } + + SVEOffsetModifier GetOffsetModifier() const { return mod_; } + unsigned GetShiftAmount() const { return shift_amount_; } + + bool IsEquivalentToLSL(unsigned amount) const { + if (shift_amount_ != amount) return false; + if (amount == 0) { + // No-shift is equivalent to "LSL #0". + return ((mod_ == SVE_LSL) || (mod_ == NO_SVE_OFFSET_MODIFIER)); + } + return mod_ == SVE_LSL; + } + + bool IsMulVl() const { return mod_ == SVE_MUL_VL; } + + bool IsValid() const; + + private: + // Allow standard `Shift` and `Extend` arguments to be used. + SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) { + if (shift == LSL) return SVE_LSL; + if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER; + // SVE does not accept any other shift. + VIXL_UNIMPLEMENTED(); + return NO_SVE_OFFSET_MODIFIER; + } + + SVEOffsetModifier GetSVEOffsetModifierFor(Extend extend = NO_EXTEND) { + if (extend == UXTW) return SVE_UXTW; + if (extend == SXTW) return SVE_SXTW; + if (extend == NO_EXTEND) return NO_SVE_OFFSET_MODIFIER; + // SVE does not accept any other extend mode. + VIXL_UNIMPLEMENTED(); + return NO_SVE_OFFSET_MODIFIER; + } + + SVEOffsetModifier GetSVEOffsetModifierFor(SVEOffsetModifier mod) { + return mod; + } + + CPURegister base_; + CPURegister regoffset_; + int64_t offset_; + SVEOffsetModifier mod_; + unsigned shift_amount_; +}; + +// Represent a signed or unsigned integer operand. +// +// This is designed to make instructions which naturally accept a _signed_ +// immediate easier to implement and use, when we also want users to be able to +// specify raw-bits values (such as with hexadecimal constants). The advantage +// of this class over a simple uint64_t (with implicit C++ sign-extension) is +// that this class can strictly check the range of allowed values. With a simple +// uint64_t, it is impossible to distinguish -1 from UINT64_MAX. +// +// For example, these instructions are equivalent: +// +// __ Insr(z0.VnB(), -1); +// __ Insr(z0.VnB(), 0xff); +// +// ... as are these: +// +// __ Insr(z0.VnD(), -1); +// __ Insr(z0.VnD(), 0xffffffffffffffff); +// +// ... but this is invalid: +// +// __ Insr(z0.VnB(), 0xffffffffffffffff); // Too big for B-sized lanes. +class IntegerOperand { + public: +#define VIXL_INT_TYPES(V) \ + V(char) V(short) V(int) V(long) V(long long) // NOLINT(runtime/int) +#define VIXL_DECL_INT_OVERLOADS(T) \ + /* These are allowed to be implicit constructors because this is a */ \ + /* wrapper class that doesn't normally perform any type conversion. */ \ + IntegerOperand(signed T immediate) /* NOLINT(runtime/explicit) */ \ + : raw_bits_(immediate), /* Allow implicit sign-extension. */ \ + is_negative_(immediate < 0) {} \ + IntegerOperand(unsigned T immediate) /* NOLINT(runtime/explicit) */ \ + : raw_bits_(immediate), is_negative_(false) {} + VIXL_INT_TYPES(VIXL_DECL_INT_OVERLOADS) +#undef VIXL_DECL_INT_OVERLOADS +#undef VIXL_INT_TYPES + + // TODO: `Operand` can currently only hold an int64_t, so some large, unsigned + // values will be misrepresented here. + explicit IntegerOperand(const Operand& operand) + : raw_bits_(operand.GetEquivalentImmediate()), + is_negative_(operand.GetEquivalentImmediate() < 0) {} + + bool IsIntN(unsigned n) const { + return is_negative_ ? vixl::IsIntN(n, RawbitsToInt64(raw_bits_)) + : vixl::IsIntN(n, raw_bits_); + } + bool IsUintN(unsigned n) const { + return !is_negative_ && vixl::IsUintN(n, raw_bits_); + } + + bool IsUint8() const { return IsUintN(8); } + bool IsUint16() const { return IsUintN(16); } + bool IsUint32() const { return IsUintN(32); } + bool IsUint64() const { return IsUintN(64); } + + bool IsInt8() const { return IsIntN(8); } + bool IsInt16() const { return IsIntN(16); } + bool IsInt32() const { return IsIntN(32); } + bool IsInt64() const { return IsIntN(64); } + + bool FitsInBits(unsigned n) const { + return is_negative_ ? IsIntN(n) : IsUintN(n); + } + bool FitsInLane(const CPURegister& zd) const { + return FitsInBits(zd.GetLaneSizeInBits()); + } + bool FitsInSignedLane(const CPURegister& zd) const { + return IsIntN(zd.GetLaneSizeInBits()); + } + bool FitsInUnsignedLane(const CPURegister& zd) const { + return IsUintN(zd.GetLaneSizeInBits()); + } + + // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to an unsigned integer + // in the range [0, UINT<n>_MAX] (using two's complement mapping). + uint64_t AsUintN(unsigned n) const { + VIXL_ASSERT(FitsInBits(n)); + return raw_bits_ & GetUintMask(n); + } + + uint8_t AsUint8() const { return static_cast<uint8_t>(AsUintN(8)); } + uint16_t AsUint16() const { return static_cast<uint16_t>(AsUintN(16)); } + uint32_t AsUint32() const { return static_cast<uint32_t>(AsUintN(32)); } + uint64_t AsUint64() const { return AsUintN(64); } + + // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to a signed integer in + // the range [INT<n>_MIN, INT<n>_MAX] (using two's complement mapping). + int64_t AsIntN(unsigned n) const { + VIXL_ASSERT(FitsInBits(n)); + return ExtractSignedBitfield64(n - 1, 0, raw_bits_); + } + + int8_t AsInt8() const { return static_cast<int8_t>(AsIntN(8)); } + int16_t AsInt16() const { return static_cast<int16_t>(AsIntN(16)); } + int32_t AsInt32() const { return static_cast<int32_t>(AsIntN(32)); } + int64_t AsInt64() const { return AsIntN(64); } + + // Several instructions encode a signed int<N>_t, which is then (optionally) + // left-shifted and sign-extended to a Z register lane with a size which may + // be larger than N. This helper tries to find an int<N>_t such that the + // IntegerOperand's arithmetic value is reproduced in each lane. + // + // This is the mechanism that allows `Insr(z0.VnB(), 0xff)` to be treated as + // `Insr(z0.VnB(), -1)`. + template <unsigned N, unsigned kShift, typename T> + bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, T* imm) const { + VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N); + VIXL_ASSERT(FitsInLane(zd)); + if ((raw_bits_ & GetUintMask(kShift)) != 0) return false; + + // Reverse the specified left-shift. + IntegerOperand unshifted(*this); + unshifted.ArithmeticShiftRight(kShift); + + if (unshifted.IsIntN(N)) { + // This is trivial, since sign-extension produces the same arithmetic + // value irrespective of the destination size. + *imm = static_cast<T>(unshifted.AsIntN(N)); + return true; + } + + // Otherwise, we might be able to use the sign-extension to produce the + // desired bit pattern. We can only do this for values in the range + // [INT<N>_MAX + 1, UINT<N>_MAX], where the highest set bit is the sign bit. + // + // The lane size has to be adjusted to compensate for `kShift`, since the + // high bits will be dropped when the encoded value is left-shifted. + if (unshifted.IsUintN(zd.GetLaneSizeInBits() - kShift)) { + int64_t encoded = unshifted.AsIntN(zd.GetLaneSizeInBits() - kShift); + if (vixl::IsIntN(N, encoded)) { + *imm = static_cast<T>(encoded); + return true; + } + } + return false; + } + + // As above, but `kShift` is written to the `*shift` parameter on success, so + // that it is easy to chain calls like this: + // + // if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || + // imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { + // insn(zd, imm8, shift) + // } + template <unsigned N, unsigned kShift, typename T, typename S> + bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, + T* imm, + S* shift) const { + if (TryEncodeAsShiftedIntNForLane<N, kShift>(zd, imm)) { + *shift = kShift; + return true; + } + return false; + } + + // As above, but assume that `kShift` is 0. + template <unsigned N, typename T> + bool TryEncodeAsIntNForLane(const CPURegister& zd, T* imm) const { + return TryEncodeAsShiftedIntNForLane<N, 0>(zd, imm); + } + + // As above, but for unsigned fields. This is usuaully a simple operation, but + // is provided for symmetry. + template <unsigned N, unsigned kShift, typename T> + bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const { + VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N); + VIXL_ASSERT(FitsInLane(zd)); + + // TODO: Should we convert -1 to 0xff here? + if (is_negative_) return false; + USE(zd); + + if ((raw_bits_ & GetUintMask(kShift)) != 0) return false; + + if (vixl::IsUintN(N, raw_bits_ >> kShift)) { + *imm = static_cast<T>(raw_bits_ >> kShift); + return true; + } + return false; + } + + template <unsigned N, unsigned kShift, typename T, typename S> + bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, + T* imm, + S* shift) const { + if (TryEncodeAsShiftedUintNForLane<N, kShift>(zd, imm)) { + *shift = kShift; + return true; + } + return false; + } + + bool IsZero() const { return raw_bits_ == 0; } + bool IsNegative() const { return is_negative_; } + bool IsPositiveOrZero() const { return !is_negative_; } + + uint64_t GetMagnitude() const { + return is_negative_ ? -raw_bits_ : raw_bits_; + } + + private: + // Shift the arithmetic value right, with sign extension if is_negative_. + void ArithmeticShiftRight(int shift) { + VIXL_ASSERT((shift >= 0) && (shift < 64)); + if (shift == 0) return; + if (is_negative_) { + raw_bits_ = ExtractSignedBitfield64(63, shift, raw_bits_); + } else { + raw_bits_ >>= shift; + } + } + + uint64_t raw_bits_; + bool is_negative_; +}; + // This an abstraction that can represent a register or memory location. The // `MacroAssembler` provides helpers to move data between generic operands. class GenericOperand { diff --git a/src/aarch64/registers-aarch64.cc b/src/aarch64/registers-aarch64.cc new file mode 100644 index 00000000..735f43c7 --- /dev/null +++ b/src/aarch64/registers-aarch64.cc @@ -0,0 +1,321 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <sstream> +#include <string> + +#include "registers-aarch64.h" + +namespace vixl { +namespace aarch64 { + +std::string CPURegister::GetArchitecturalName() const { + std::ostringstream name; + if (IsZRegister()) { + name << 'z' << GetCode(); + if (HasLaneSize()) { + name << '.' << GetLaneSizeSymbol(); + } + } else if (IsPRegister()) { + name << 'p' << GetCode(); + if (HasLaneSize()) { + name << '.' << GetLaneSizeSymbol(); + } + switch (qualifiers_) { + case kNoQualifiers: + break; + case kMerging: + name << "/m"; + break; + case kZeroing: + name << "/z"; + break; + } + } else { + VIXL_UNIMPLEMENTED(); + } + return name.str(); +} + +unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) { + switch (bank) { + case kNoRegisterBank: + return 0; + case kRRegisterBank: + return Register::GetMaxCode(); + case kVRegisterBank: +#ifdef VIXL_HAS_CONSTEXPR + VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode()); +#else + VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode()); +#endif + return VRegister::GetMaxCode(); + case kPRegisterBank: + return PRegister::GetMaxCode(); + } + VIXL_UNREACHABLE(); + return 0; +} + +bool CPURegister::IsValidRegister() const { + return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) && + (bank_ == kRRegisterBank) && + ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) && + (qualifiers_ == kNoQualifiers) && (lane_size_ == size_); +} + +bool CPURegister::IsValidVRegister() const { + VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); + return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) && + ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) && + (qualifiers_ == kNoQualifiers) && + (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_); +} + +bool CPURegister::IsValidFPRegister() const { + return IsValidVRegister() && IsFPRegister(); +} + +bool CPURegister::IsValidZRegister() const { + VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); + // Z registers are valid with or without a lane size, so we don't need to + // check lane_size_. + return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) && + (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers); +} + +bool CPURegister::IsValidPRegister() const { + VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); + // P registers are valid with or without a lane size, so we don't need to + // check lane_size_. + return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) && + (size_ == kEncodedUnknownSize) && + ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) || + (qualifiers_ == kZeroing)); +} + +bool CPURegister::IsValid() const { + return IsValidRegister() || IsValidVRegister() || IsValidZRegister() || + IsValidPRegister(); +} + +// Most coersions simply invoke the necessary constructor. +#define VIXL_CPUREG_COERCION_LIST(U) \ + U(Register, W, R) \ + U(Register, X, R) \ + U(VRegister, B, V) \ + U(VRegister, H, V) \ + U(VRegister, S, V) \ + U(VRegister, D, V) \ + U(VRegister, Q, V) \ + U(VRegister, V, V) \ + U(ZRegister, Z, V) \ + U(PRegister, P, P) +#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \ + RET_TYPE CPURegister::CTOR_TYPE() const { \ + VIXL_ASSERT(GetBank() == k##BANK##RegisterBank); \ + return CTOR_TYPE##Register(GetCode()); \ + } +VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION) +#undef VIXL_CPUREG_COERCION_LIST +#undef VIXL_DEFINE_CPUREG_COERCION + +// NEON lane-format coersions always return VRegisters. +#define VIXL_CPUREG_NEON_COERCION_LIST(V) \ + V(8, B) \ + V(16, B) \ + V(2, H) \ + V(4, H) \ + V(8, H) \ + V(2, S) \ + V(4, S) \ + V(1, D) \ + V(2, D) +#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE) \ + VRegister VRegister::V##LANES##LANE_TYPE() const { \ + VIXL_ASSERT(IsVRegister()); \ + return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \ + } +VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION) +#undef VIXL_CPUREG_NEON_COERCION_LIST +#undef VIXL_DEFINE_CPUREG_NEON_COERCION + +// Semantic type coersion for sdot and udot. +// TODO: Use the qualifiers_ field to distinguish this from ::S(). +VRegister VRegister::S4B() const { + VIXL_ASSERT(IsVRegister()); + return SRegister(GetCode()); +} + +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + int number_of_valid_regs = 0; + int number_of_valid_vregs = 0; + int number_of_valid_pregs = 0; + + RegList unique_regs = 0; + RegList unique_vregs = 0; + RegList unique_pregs = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8}; + + for (size_t i = 0; i < ArrayLength(regs); i++) { + switch (regs[i].GetBank()) { + case CPURegister::kRRegisterBank: + number_of_valid_regs++; + unique_regs |= regs[i].GetBit(); + break; + case CPURegister::kVRegisterBank: + number_of_valid_vregs++; + unique_vregs |= regs[i].GetBit(); + break; + case CPURegister::kPRegisterBank: + number_of_valid_pregs++; + unique_pregs |= regs[i].GetBit(); + break; + case CPURegister::kNoRegisterBank: + VIXL_ASSERT(regs[i].IsNone()); + break; + } + } + + int number_of_unique_regs = CountSetBits(unique_regs); + int number_of_unique_vregs = CountSetBits(unique_vregs); + int number_of_unique_pregs = CountSetBits(unique_pregs); + + VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs); + VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs); + VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs); + + return (number_of_valid_regs != number_of_unique_regs) || + (number_of_valid_vregs != number_of_unique_vregs) || + (number_of_valid_pregs != number_of_unique_pregs); +} + +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1); + match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1); + match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1); + match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1); + match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1); + match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1); + match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1); + return match; +} + +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool even = (reg1.GetCode() % 2) == 0; + even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0); + even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0); + even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0); + even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0); + even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0); + even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0); + even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0); + return even; +} + +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + + if (!reg2.IsValid()) { + return true; + } else if (reg2.GetCode() != + ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { + return false; + } + + if (!reg3.IsValid()) { + return true; + } else if (reg3.GetCode() != + ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { + return false; + } + + if (!reg4.IsValid()) { + return true; + } else if (reg4.GetCode() != + ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { + return false; + } + + return true; +} + +bool AreSameFormat(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameFormat(reg1); + match &= !reg3.IsValid() || reg3.IsSameFormat(reg1); + match &= !reg4.IsValid() || reg4.IsSameFormat(reg1); + return match; +} + +bool AreSameLaneSize(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= + !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); + match &= + !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); + match &= + !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); + return match; +} +} +} // namespace vixl::aarch64 diff --git a/src/aarch64/registers-aarch64.h b/src/aarch64/registers-aarch64.h new file mode 100644 index 00000000..911974a8 --- /dev/null +++ b/src/aarch64/registers-aarch64.h @@ -0,0 +1,900 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_ +#define VIXL_AARCH64_REGISTERS_AARCH64_H_ + +#include <string> + +#include "instructions-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// An integer type capable of representing a homogeneous, non-overlapping set of +// registers as a bitmask of their codes. +typedef uint64_t RegList; +static const int kRegListSizeInBits = sizeof(RegList) * 8; + +class Register; +class WRegister; +class XRegister; + +class VRegister; +class BRegister; +class HRegister; +class SRegister; +class DRegister; +class QRegister; + +class ZRegister; + +class PRegister; +class PRegisterWithLaneSize; +class PRegisterM; +class PRegisterZ; + +// A container for any single register supported by the processor. Selected +// qualifications are also supported. Basic registers can be constructed +// directly as CPURegister objects. Other variants should be constructed as one +// of the derived classes. +// +// CPURegister aims to support any getter that would also be available to more +// specialised register types. However, using the equivalent functions on the +// specialised register types can avoid run-time checks, and should therefore be +// preferred where run-time polymorphism isn't required. +// +// Type-specific modifers are typically implemented only on the derived classes. +// +// The encoding is such that CPURegister objects are cheap to pass by value. +class CPURegister { + public: + enum RegisterBank : uint8_t { + kNoRegisterBank = 0, + kRRegisterBank, + kVRegisterBank, + kPRegisterBank + }; + enum RegisterType { + kNoRegister, + kRegister, + kVRegister, + kZRegister, + kPRegister + }; + + static const unsigned kUnknownSize = 0; + + VIXL_CONSTEXPR CPURegister() + : code_(0), + bank_(kNoRegisterBank), + size_(kEncodedUnknownSize), + qualifiers_(kNoQualifiers), + lane_size_(kEncodedUnknownSize) {} + + CPURegister(int code, int size_in_bits, RegisterType type) + : code_(code), + bank_(GetBankFor(type)), + size_(EncodeSizeInBits(size_in_bits)), + qualifiers_(kNoQualifiers), + lane_size_(EncodeSizeInBits(size_in_bits)) { + VIXL_ASSERT(IsValid()); + } + + // Basic accessors. + + // TODO: Make this return 'int'. + unsigned GetCode() const { return code_; } + + RegisterBank GetBank() const { return bank_; } + + // For scalar registers, the lane size matches the register size, and is + // always known. + bool HasSize() const { return size_ != kEncodedUnknownSize; } + bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; } + + RegList GetBit() const { + if (IsNone()) return 0; + VIXL_ASSERT(code_ < kRegListSizeInBits); + return static_cast<RegList>(1) << code_; + } + + // Return the architectural name for this register. + // TODO: This is temporary. Ultimately, we should move the + // Simulator::*RegNameForCode helpers out of the simulator, and provide an + // independent way to obtain the name of a register. + std::string GetArchitecturalName() const; + + // Return the highest valid register code for this type, to allow generic + // loops to be written. This excludes kSPRegInternalCode, since it is not + // contiguous, and sp usually requires special handling anyway. + unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); } + + // Registers without a known size report kUnknownSize. + int GetSizeInBits() const { return DecodeSizeInBits(size_); } + int GetSizeInBytes() const { return DecodeSizeInBytes(size_); } + // TODO: Make these return 'int'. + unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); } + unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); } + unsigned GetLaneSizeInBytesLog2() const { + VIXL_ASSERT(HasLaneSize()); + return DecodeSizeInBytesLog2(lane_size_); + } + + int GetLanes() const { + if (HasSize() && HasLaneSize()) { + // Take advantage of the size encoding to calculate this efficiently. + VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1)); + VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1)); + VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1)); + VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1)); + int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_); + VIXL_ASSERT(log2_delta >= 0); + return 1 << log2_delta; + } + return kUnknownSize; + } + + bool Is8Bits() const { return size_ == kEncodedBRegSize; } + bool Is16Bits() const { return size_ == kEncodedHRegSize; } + bool Is32Bits() const { return size_ == kEncodedSRegSize; } + bool Is64Bits() const { return size_ == kEncodedDRegSize; } + bool Is128Bits() const { return size_ == kEncodedQRegSize; } + + bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; } + bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; } + bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; } + bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; } + bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; } + + // If Is<Foo>Register(), then it is valid to convert the CPURegister to some + // <Foo>Register<Bar> type. + // + // If... ... then it is safe to construct ... + // r.IsRegister() -> Register(r) + // r.IsVRegister() -> VRegister(r) + // r.IsZRegister() -> ZRegister(r) + // r.IsPRegister() -> PRegister(r) + // + // r.IsPRegister() && HasLaneSize() -> PRegisterWithLaneSize(r) + // r.IsPRegister() && IsMerging() -> PRegisterM(r) + // r.IsPRegister() && IsZeroing() -> PRegisterZ(r) + bool IsRegister() const { return GetType() == kRegister; } + bool IsVRegister() const { return GetType() == kVRegister; } + bool IsZRegister() const { return GetType() == kZRegister; } + bool IsPRegister() const { return GetType() == kPRegister; } + + bool IsNone() const { return GetType() == kNoRegister; } + + // `GetType() == kNoRegister` implies IsNone(), and vice-versa. + // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa. + RegisterType GetType() const { + switch (bank_) { + case kNoRegisterBank: + return kNoRegister; + case kRRegisterBank: + return kRegister; + case kVRegisterBank: + return HasSize() ? kVRegister : kZRegister; + case kPRegisterBank: + return kPRegister; + } + VIXL_UNREACHABLE(); + return kNoRegister; + } + + // IsFPRegister() is true for scalar FP types (and therefore implies + // IsVRegister()). There is no corresponding FPRegister type. + bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); } + + // TODO: These are stricter forms of the helpers above. We should make the + // basic helpers strict, and remove these. + bool IsValidRegister() const; + bool IsValidVRegister() const; + bool IsValidFPRegister() const; + bool IsValidZRegister() const; + bool IsValidPRegister() const; + + bool IsValid() const; + bool IsValidOrNone() const { return IsNone() || IsValid(); } + + bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); } + bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); } + + bool IsSameType(const CPURegister& other) const { + return GetType() == other.GetType(); + } + + bool IsSameBank(const CPURegister& other) const { + return GetBank() == other.GetBank(); + } + + // Two registers with unknown size are considered to have the same size if + // they also have the same type. For example, all Z registers have the same + // size, even though we don't know what that is. + bool IsSameSizeAndType(const CPURegister& other) const { + return IsSameType(other) && (size_ == other.size_); + } + + bool IsSameFormat(const CPURegister& other) const { + return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_); + } + + // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'. + bool Aliases(const CPURegister& other) const { + return IsSameBank(other) && (code_ == other.code_); + } + + bool Is(const CPURegister& other) const { + if (IsRegister() || IsVRegister()) { + // For core (W, X) and FP/NEON registers, we only consider the code, size + // and type. This is legacy behaviour. + // TODO: We should probably check every field for all registers. + return Aliases(other) && (size_ == other.size_); + } else { + // For Z and P registers, we require all fields to match exactly. + VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister()); + return (code_ == other.code_) && (bank_ == other.bank_) && + (size_ == other.size_) && (qualifiers_ == other.qualifiers_) && + (lane_size_ == other.lane_size_); + } + } + + // Conversions to specific register types. The result is a register that + // aliases the original CPURegister. That is, the original register bank + // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all + // other properties are ignored. + // + // Typical usage: + // + // if (reg.GetBank() == kVRegisterBank) { + // DRegister d = reg.D(); + // ... + // } + // + // These could all return types with compile-time guarantees (like XRegister), + // but this breaks backwards-compatibility quite severely, particularly with + // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type. + + // Core registers, like "w0". + Register W() const; + Register X() const; + // FP/NEON registers, like "b0". + VRegister B() const; + VRegister H() const; + VRegister S() const; + VRegister D() const; + VRegister Q() const; + VRegister V() const; + // SVE registers, like "z0". + ZRegister Z() const; + PRegister P() const; + + // Utilities for kRegister types. + + bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); } + bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); } + bool IsW() const { return IsRegister() && Is32Bits(); } + bool IsX() const { return IsRegister() && Is64Bits(); } + + // Utilities for FP/NEON kVRegister types. + + // These helpers ensure that the size and type of the register are as + // described. They do not consider the number of lanes that make up a vector. + // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() + // does not imply Is1D() or Is8B(). + // Check the number of lanes, ie. the format of the vector, using methods such + // as Is8B(), Is1D(), etc. + bool IsB() const { return IsVRegister() && Is8Bits(); } + bool IsH() const { return IsVRegister() && Is16Bits(); } + bool IsS() const { return IsVRegister() && Is32Bits(); } + bool IsD() const { return IsVRegister() && Is64Bits(); } + bool IsQ() const { return IsVRegister() && Is128Bits(); } + + // As above, but also check that the register has exactly one lane. For + // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does + // not. + bool Is1B() const { return IsB() && IsScalar(); } + bool Is1H() const { return IsH() && IsScalar(); } + bool Is1S() const { return IsS() && IsScalar(); } + bool Is1D() const { return IsD() && IsScalar(); } + bool Is1Q() const { return IsQ() && IsScalar(); } + + // Check the specific NEON format. + bool Is8B() const { return IsD() && IsLaneSizeB(); } + bool Is16B() const { return IsQ() && IsLaneSizeB(); } + bool Is2H() const { return IsS() && IsLaneSizeH(); } + bool Is4H() const { return IsD() && IsLaneSizeH(); } + bool Is8H() const { return IsQ() && IsLaneSizeH(); } + bool Is2S() const { return IsD() && IsLaneSizeS(); } + bool Is4S() const { return IsQ() && IsLaneSizeS(); } + bool Is2D() const { return IsQ() && IsLaneSizeD(); } + + // A semantic alias for sdot and udot (indexed and by element) instructions. + // The current CPURegister implementation cannot not tell this from Is1S(), + // but it might do later. + // TODO: Do this with the qualifiers_ field. + bool Is1S4B() const { return Is1S(); } + + // Utilities for SVE registers. + + bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; } + bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); } + bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); } + + // SVE types have unknown sizes, but within known bounds. + + int GetMaxSizeInBytes() const { + switch (GetType()) { + case kZRegister: + return kZRegMaxSizeInBytes; + case kPRegister: + return kPRegMaxSizeInBytes; + default: + VIXL_ASSERT(HasSize()); + return GetSizeInBits(); + } + } + + int GetMinSizeInBytes() const { + switch (GetType()) { + case kZRegister: + return kZRegMinSizeInBytes; + case kPRegister: + return kPRegMinSizeInBytes; + default: + VIXL_ASSERT(HasSize()); + return GetSizeInBits(); + } + } + + int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; } + int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; } + + static RegisterBank GetBankFor(RegisterType type) { + switch (type) { + case kNoRegister: + return kNoRegisterBank; + case kRegister: + return kRRegisterBank; + case kVRegister: + case kZRegister: + return kVRegisterBank; + case kPRegister: + return kPRegisterBank; + } + VIXL_UNREACHABLE(); + return kNoRegisterBank; + } + + static unsigned GetMaxCodeFor(CPURegister::RegisterType type) { + return GetMaxCodeFor(GetBankFor(type)); + } + + protected: + enum EncodedSize : uint8_t { + // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero. + kEncodedUnknownSize = 0, + + // The implementation assumes that the remaining sizes are encoded as + // `log2(size) + c`, so the following names must remain in sequence. + kEncodedBRegSize, + kEncodedHRegSize, + kEncodedSRegSize, + kEncodedDRegSize, + kEncodedQRegSize, + + kEncodedWRegSize = kEncodedSRegSize, + kEncodedXRegSize = kEncodedDRegSize + }; + VIXL_STATIC_ASSERT(kSRegSize == kWRegSize); + VIXL_STATIC_ASSERT(kDRegSize == kXRegSize); + + char GetLaneSizeSymbol() const { + switch (lane_size_) { + case kEncodedBRegSize: + return 'B'; + case kEncodedHRegSize: + return 'H'; + case kEncodedSRegSize: + return 'S'; + case kEncodedDRegSize: + return 'D'; + case kEncodedQRegSize: + return 'Q'; + case kEncodedUnknownSize: + break; + } + VIXL_UNREACHABLE(); + return '?'; + } + + static EncodedSize EncodeSizeInBits(int size_in_bits) { + switch (size_in_bits) { + case kUnknownSize: + return kEncodedUnknownSize; + case kBRegSize: + return kEncodedBRegSize; + case kHRegSize: + return kEncodedHRegSize; + case kSRegSize: + return kEncodedSRegSize; + case kDRegSize: + return kEncodedDRegSize; + case kQRegSize: + return kEncodedQRegSize; + } + VIXL_UNREACHABLE(); + return kEncodedUnknownSize; + } + + static int DecodeSizeInBytesLog2(EncodedSize encoded_size) { + switch (encoded_size) { + case kEncodedUnknownSize: + // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here. + VIXL_UNREACHABLE(); + return -1; + case kEncodedBRegSize: + return kBRegSizeInBytesLog2; + case kEncodedHRegSize: + return kHRegSizeInBytesLog2; + case kEncodedSRegSize: + return kSRegSizeInBytesLog2; + case kEncodedDRegSize: + return kDRegSizeInBytesLog2; + case kEncodedQRegSize: + return kQRegSizeInBytesLog2; + } + VIXL_UNREACHABLE(); + return kUnknownSize; + } + + static int DecodeSizeInBytes(EncodedSize encoded_size) { + if (encoded_size == kEncodedUnknownSize) { + return kUnknownSize; + } + return 1 << DecodeSizeInBytesLog2(encoded_size); + } + + static int DecodeSizeInBits(EncodedSize encoded_size) { + VIXL_STATIC_ASSERT(kUnknownSize == 0); + return DecodeSizeInBytes(encoded_size) * kBitsPerByte; + } + + static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank); + + enum Qualifiers : uint8_t { + kNoQualifiers = 0, + // Used by P registers. + kMerging, + kZeroing + }; + + // An unchecked constructor, for use by derived classes. + CPURegister(int code, + EncodedSize size, + RegisterBank bank, + EncodedSize lane_size, + Qualifiers qualifiers = kNoQualifiers) + : code_(code), + bank_(bank), + size_(size), + qualifiers_(qualifiers), + lane_size_(lane_size) {} + + // TODO: Check that access to these fields is reasonably efficient. + uint8_t code_; + RegisterBank bank_; + EncodedSize size_; + Qualifiers qualifiers_; + EncodedSize lane_size_; +}; +// Ensure that CPURegisters can fit in a single (64-bit) register. This is a +// proxy for being "cheap to pass by value", which is hard to check directly. +VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t)); + +// TODO: Add constexpr constructors. +#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \ + VIXL_CONSTEXPR NAME() : PARENT_TYPE() {} \ + \ + explicit NAME(CPURegister other) : PARENT_TYPE(other) { \ + VIXL_ASSERT(IsValid()); \ + } \ + \ + VIXL_CONSTEXPR static unsigned GetMaxCode() { \ + return kNumberOf##REGISTER_TYPE##s - 1; \ + } + +// Any W or X register, including the zero register and the stack pointer. +class Register : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister) + + Register(int code, int size_in_bits) + : CPURegister(code, size_in_bits, kRegister) { + VIXL_ASSERT(IsValidRegister()); + } + + bool IsValid() const { return IsValidRegister(); } +}; + +// Any FP or NEON V register, including vector (V.<T>) and scalar forms +// (B, H, S, D, Q). +class VRegister : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister) + + // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0). + explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1) + : CPURegister(code, + EncodeSizeInBits(size_in_bits), + kVRegisterBank, + EncodeLaneSizeInBits(size_in_bits, lanes)) { + VIXL_ASSERT(IsValidVRegister()); + } + + VRegister(int code, VectorFormat format) + : CPURegister(code, + EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)), + kVRegisterBank, + EncodeSizeInBits(LaneSizeInBitsFromFormat(format)), + kNoQualifiers) { + VIXL_ASSERT(IsValid()); + } + + VRegister V8B() const; + VRegister V16B() const; + VRegister V2H() const; + VRegister V4H() const; + VRegister V8H() const; + VRegister V2S() const; + VRegister V4S() const; + VRegister V1D() const; + VRegister V2D() const; + VRegister S4B() const; + + bool IsValid() const { return IsValidVRegister(); } + + protected: + static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) { + VIXL_ASSERT(lanes >= 1); + VIXL_ASSERT((size_in_bits % lanes) == 0); + return EncodeSizeInBits(size_in_bits / lanes); + } +}; + +// Any SVE Z register, with or without a lane size specifier. +class ZRegister : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister) + + explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize) + : CPURegister(code, + kEncodedUnknownSize, + kVRegisterBank, + EncodeSizeInBits(lane_size_in_bits)) { + VIXL_ASSERT(IsValid()); + } + + ZRegister(int code, VectorFormat format) + : CPURegister(code, + kEncodedUnknownSize, + kVRegisterBank, + EncodeSizeInBits(LaneSizeInBitsFromFormat(format)), + kNoQualifiers) { + VIXL_ASSERT(IsValid()); + } + + // Return a Z register with a known lane size (like "z0.B"). + ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); } + ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); } + ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); } + ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); } + ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); } + + template <typename T> + ZRegister WithLaneSize(T format) const { + return ZRegister(GetCode(), format); + } + + ZRegister WithSameLaneSizeAs(const CPURegister& other) const { + VIXL_ASSERT(other.HasLaneSize()); + return this->WithLaneSize(other.GetLaneSizeInBits()); + } + + bool IsValid() const { return IsValidZRegister(); } +}; + +// Any SVE P register, with or without a qualifier or lane size specifier. +class PRegister : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister) + + explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && !HasLaneSize() && IsUnqualified(); + } + + // Return a P register with a known lane size (like "p0.B"). + PRegisterWithLaneSize VnB() const; + PRegisterWithLaneSize VnH() const; + PRegisterWithLaneSize VnS() const; + PRegisterWithLaneSize VnD() const; + + template <typename T> + PRegisterWithLaneSize WithLaneSize(T format) const; + + PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const; + + // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or + // "/m" (merging) suffix. These methods are VIXL's equivalents. + PRegisterZ Zeroing() const; + PRegisterM Merging() const; + + protected: + // Unchecked constructors, for use by derived classes. + PRegister(int code, EncodedSize encoded_lane_size) + : CPURegister(code, + kEncodedUnknownSize, + kPRegisterBank, + encoded_lane_size, + kNoQualifiers) {} + + PRegister(int code, Qualifiers qualifiers) + : CPURegister(code, + kEncodedUnknownSize, + kPRegisterBank, + kEncodedUnknownSize, + qualifiers) {} +}; + +// Any SVE P register with a known lane size (like "p0.B"). +class PRegisterWithLaneSize : public PRegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister) + + PRegisterWithLaneSize(int code, int lane_size_in_bits) + : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) { + VIXL_ASSERT(IsValid()); + } + + PRegisterWithLaneSize(int code, VectorFormat format) + : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && HasLaneSize() && IsUnqualified(); + } + + // Overload lane size accessors so we can assert `HasLaneSize()`. This allows + // tools such as clang-tidy to prove that the result of GetLaneSize* is + // non-zero. + + // TODO: Make these return 'int'. + unsigned GetLaneSizeInBits() const { + VIXL_ASSERT(HasLaneSize()); + return PRegister::GetLaneSizeInBits(); + } + + unsigned GetLaneSizeInBytes() const { + VIXL_ASSERT(HasLaneSize()); + return PRegister::GetLaneSizeInBytes(); + } +}; + +// Any SVE P register with the zeroing qualifier (like "p0/z"). +class PRegisterZ : public PRegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister) + + explicit PRegisterZ(int code) : PRegister(code, kZeroing) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && !HasLaneSize() && IsZeroing(); + } +}; + +// Any SVE P register with the merging qualifier (like "p0/m"). +class PRegisterM : public PRegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister) + + explicit PRegisterM(int code) : PRegister(code, kMerging) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && !HasLaneSize() && IsMerging(); + } +}; + +inline PRegisterWithLaneSize PRegister::VnB() const { + return PRegisterWithLaneSize(GetCode(), kBRegSize); +} +inline PRegisterWithLaneSize PRegister::VnH() const { + return PRegisterWithLaneSize(GetCode(), kHRegSize); +} +inline PRegisterWithLaneSize PRegister::VnS() const { + return PRegisterWithLaneSize(GetCode(), kSRegSize); +} +inline PRegisterWithLaneSize PRegister::VnD() const { + return PRegisterWithLaneSize(GetCode(), kDRegSize); +} + +template <typename T> +inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const { + return PRegisterWithLaneSize(GetCode(), format); +} + +inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs( + const CPURegister& other) const { + VIXL_ASSERT(other.HasLaneSize()); + return this->WithLaneSize(other.GetLaneSizeInBits()); +} + +inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); } +inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); } + +#define VIXL_REGISTER_WITH_SIZE_LIST(V) \ + V(WRegister, kWRegSize, Register) \ + V(XRegister, kXRegSize, Register) \ + V(QRegister, kQRegSize, VRegister) \ + V(DRegister, kDRegSize, VRegister) \ + V(SRegister, kSRegSize, VRegister) \ + V(HRegister, kHRegSize, VRegister) \ + V(BRegister, kBRegSize, VRegister) + +#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT) \ + class NAME : public PARENT { \ + public: \ + VIXL_CONSTEXPR NAME() : PARENT() {} \ + explicit NAME(int code) : PARENT(code, SIZE) {} \ + \ + explicit NAME(PARENT other) : PARENT(other) { \ + VIXL_ASSERT(GetSizeInBits() == SIZE); \ + } \ + \ + PARENT As##PARENT() const { return *this; } \ + \ + VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; } \ + \ + bool IsValid() const { \ + return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \ + } \ + }; + +VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE) + +// No*Reg is used to provide default values for unused arguments, error cases +// and so on. Note that these (and the default constructors) all compare equal +// (using the Is() method). +const Register NoReg; +const VRegister NoVReg; +const CPURegister NoCPUReg; +const ZRegister NoZReg; + +// TODO: Ideally, these would use specialised register types (like XRegister and +// so on). However, doing so throws up template overloading problems elsewhere. +#define VIXL_DEFINE_REGISTERS(N) \ + const Register w##N = WRegister(N); \ + const Register x##N = XRegister(N); \ + const VRegister b##N = BRegister(N); \ + const VRegister h##N = HRegister(N); \ + const VRegister s##N = SRegister(N); \ + const VRegister d##N = DRegister(N); \ + const VRegister q##N = QRegister(N); \ + const VRegister v##N(N); \ + const ZRegister z##N(N); +AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS) +#undef VIXL_DEFINE_REGISTERS + +#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N); +AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS) +#undef VIXL_DEFINE_P_REGISTERS + +// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'. +const Register wsp = WRegister(kSPRegInternalCode); +const Register sp = XRegister(kSPRegInternalCode); + +// Standard aliases. +const Register ip0 = x16; +const Register ip1 = x17; +const Register lr = x30; +const Register xzr = x31; +const Register wzr = w31; + +// AreAliased returns true if any of the named registers overlap. Arguments +// set to NoReg are ignored. The system stack pointer may be specified. +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + +// AreSameSizeAndType returns true if all of the specified registers have the +// same size, and are of the same type. The system stack pointer may be +// specified. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg, + const CPURegister& reg5 = NoCPUReg, + const CPURegister& reg6 = NoCPUReg, + const CPURegister& reg7 = NoCPUReg, + const CPURegister& reg8 = NoCPUReg); + +// AreEven returns true if all of the specified registers have even register +// indices. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + +// AreConsecutive returns true if all of the specified registers are +// consecutive in the register file. Arguments set to NoReg are ignored, as are +// any subsequent arguments. At least one argument (reg1) must be valid +// (not NoCPUReg). +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + +// AreSameFormat returns true if all of the specified registers have the same +// vector format. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoVReg). +bool AreSameFormat(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + +// AreSameLaneSize returns true if all of the specified registers have the same +// element lane size, B, H, S or D. It doesn't compare the type of registers. +// Arguments set to NoReg are ignored, as are any subsequent arguments. +// At least one argument (reg1) must be valid (not NoVReg). +// TODO: Remove this, and replace its uses with AreSameFormat. +bool AreSameLaneSize(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); +} +} // namespace vixl::aarch64 + +#endif // VIXL_AARCH64_REGISTERS_AARCH64_H_ diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 855a2971..6d6d1677 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -26,6 +26,9 @@ #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 +#include <errno.h> +#include <unistd.h> + #include <cmath> #include <cstring> #include <limits> @@ -65,12 +68,13 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) { Simulator::Simulator(Decoder* decoder, FILE* stream) - : cpu_features_auditor_(decoder, CPUFeatures::All()) { + : movprfx_(NULL), cpu_features_auditor_(decoder, CPUFeatures::All()) { // Ensure that shift operations act as the simulator expects. VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1); VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff); - instruction_stats_ = false; + // Set up a dummy pipe for CanReadMemory. + VIXL_CHECK(pipe(dummy_pipe_fd_) == 0); // Set up the decoder. decoder_ = decoder; @@ -91,6 +95,10 @@ Simulator::Simulator(Decoder* decoder, FILE* stream) SetColouredTrace(false); trace_parameters_ = LOG_NONE; + // We have to configure the SVE vector register length before calling + // ResetState(). + SetVectorLengthInBits(kZRegMinSize); + ResetState(); // Allocate and set up the simulator stack. @@ -105,8 +113,6 @@ Simulator::Simulator(Decoder* decoder, FILE* stream) tos = AlignDown(tos, 16); WriteSp(tos); - instrumentation_ = NULL; - // Print a warning about exclusive-access instructions, but only the first // time they are encountered. This warning can be silenced using // SilenceExclusiveAccessWarning(). @@ -116,52 +122,111 @@ Simulator::Simulator(Decoder* decoder, FILE* stream) // Initialize the common state of RNDR and RNDRRS. uint16_t seed[3] = {11, 22, 33}; - VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rndr_state_)); - memcpy(rndr_state_, seed, sizeof(rndr_state_)); -} + VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_)); + memcpy(rand_state_, seed, sizeof(rand_state_)); + // Initialize all bits of pseudo predicate register to true. + LogicPRegister ones(pregister_all_true_); + ones.SetAllBits(); +} -void Simulator::ResetState() { +void Simulator::ResetSystemRegisters() { // Reset the system registers. nzcv_ = SimSystemRegister::DefaultValueFor(NZCV); fpcr_ = SimSystemRegister::DefaultValueFor(FPCR); + ResetFFR(); +} - // Reset registers to 0. - pc_ = NULL; - pc_modified_ = false; +void Simulator::ResetRegisters() { for (unsigned i = 0; i < kNumberOfRegisters; i++) { WriteXRegister(i, 0xbadbeef); } - // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP. - uint64_t nan_bits[] = { - UINT64_C(0x7ff00cab7f8ba9e1), UINT64_C(0x7ff0dead7f8beef1), - }; - VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits[0] & kDRegMask))); - VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits[0] & kSRegMask))); + // Returning to address 0 exits the Simulator. + WriteLr(kEndOfSimAddress); +} - qreg_t q_bits; - VIXL_ASSERT(sizeof(q_bits) == sizeof(nan_bits)); - memcpy(&q_bits, nan_bits, sizeof(nan_bits)); +void Simulator::ResetVRegisters() { + // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP. + VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0); + int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes; + for (unsigned i = 0; i < kNumberOfZRegisters; i++) { + VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes()); + vregisters_[i].NotifyAccessAsZ(); + for (int lane = 0; lane < lane_count; lane++) { + // Encode the register number and (D-sized) lane into each NaN, to + // make them easier to trace. + uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) | + (0x0000000000000001 * lane); + VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask))); + VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask))); + vregisters_[i].Insert(lane, nan_bits); + } + } +} - for (unsigned i = 0; i < kNumberOfVRegisters; i++) { - WriteQRegister(i, q_bits); +void Simulator::ResetPRegisters() { + VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0); + int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes; + // Ensure the register configuration fits in this bit encoding. + VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX); + VIXL_ASSERT(lane_count <= UINT8_MAX); + for (unsigned i = 0; i < kNumberOfPRegisters; i++) { + VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes()); + for (int lane = 0; lane < lane_count; lane++) { + // Encode the register number and (H-sized) lane into each lane slot. + uint16_t bits = (0x0100 * lane) | i; + pregisters_[i].Insert(lane, bits); + } } - // Returning to address 0 exits the Simulator. - WriteLr(kEndOfSimAddress); +} + +void Simulator::ResetFFR() { + VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0); + int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes; + ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes))); +} + +void Simulator::ResetState() { + ResetSystemRegisters(); + ResetRegisters(); + ResetVRegisters(); + ResetPRegisters(); + pc_ = NULL; + pc_modified_ = false; + + // BTI state. btype_ = DefaultBType; next_btype_ = DefaultBType; } +void Simulator::SetVectorLengthInBits(unsigned vector_length) { + VIXL_ASSERT((vector_length >= kZRegMinSize) && + (vector_length <= kZRegMaxSize)); + VIXL_ASSERT((vector_length % kZRegMinSize) == 0); + vector_length_ = vector_length; + + for (unsigned i = 0; i < kNumberOfZRegisters; i++) { + vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes()); + } + for (unsigned i = 0; i < kNumberOfPRegisters; i++) { + pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes()); + } + + ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes()); + + ResetVRegisters(); + ResetPRegisters(); + ResetFFR(); +} Simulator::~Simulator() { delete[] stack_; // The decoder may outlive the simulator. decoder_->RemoveVisitor(print_disasm_); delete print_disasm_; - - decoder_->RemoveVisitor(instrumentation_); - delete instrumentation_; + close(dummy_pipe_fd_[0]); + close(dummy_pipe_fd_[1]); } @@ -182,6 +247,7 @@ void Simulator::RunFrom(const Instruction* first) { } +// clang-format off const char* Simulator::xreg_names[] = {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", @@ -196,6 +262,13 @@ const char* Simulator::wreg_names[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"}; +const char* Simulator::breg_names[] = {"b0", "b1", "b2", "b3", "b4", "b5", + "b6", "b7", "b8", "b9", "b10", "b11", + "b12", "b13", "b14", "b15", "b16", "b17", + "b18", "b19", "b20", "b21", "b22", "b23", + "b24", "b25", "b26", "b27", "b28", "b29", + "b30", "b31"}; + const char* Simulator::hreg_names[] = {"h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", "h16", "h17", @@ -224,27 +297,47 @@ const char* Simulator::vreg_names[] = {"v0", "v1", "v2", "v3", "v4", "v5", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"}; +const char* Simulator::zreg_names[] = {"z0", "z1", "z2", "z3", "z4", "z5", + "z6", "z7", "z8", "z9", "z10", "z11", + "z12", "z13", "z14", "z15", "z16", "z17", + "z18", "z19", "z20", "z21", "z22", "z23", + "z24", "z25", "z26", "z27", "z28", "z29", + "z30", "z31"}; + +const char* Simulator::preg_names[] = {"p0", "p1", "p2", "p3", "p4", "p5", + "p6", "p7", "p8", "p9", "p10", "p11", + "p12", "p13", "p14", "p15"}; +// clang-format on + const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) { - VIXL_ASSERT(code < kNumberOfRegisters); // If the code represents the stack pointer, index the name after zr. - if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) { + if ((code == kSPRegInternalCode) || + ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) { code = kZeroRegCode + 1; } + VIXL_ASSERT(code < ArrayLength(wreg_names)); return wreg_names[code]; } const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) { - VIXL_ASSERT(code < kNumberOfRegisters); // If the code represents the stack pointer, index the name after zr. - if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) { + if ((code == kSPRegInternalCode) || + ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) { code = kZeroRegCode + 1; } + VIXL_ASSERT(code < ArrayLength(xreg_names)); return xreg_names[code]; } +const char* Simulator::BRegNameForCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return breg_names[code]; +} + + const char* Simulator::HRegNameForCode(unsigned code) { VIXL_ASSERT(code < kNumberOfVRegisters); return hreg_names[code]; @@ -269,6 +362,39 @@ const char* Simulator::VRegNameForCode(unsigned code) { } +const char* Simulator::ZRegNameForCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfZRegisters); + return zreg_names[code]; +} + + +const char* Simulator::PRegNameForCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfPRegisters); + return preg_names[code]; +} + +SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) { + SimVRegister ones, result; + dup_immediate(kFormatVnB, ones, 0xff); + mov_zeroing(kFormatVnB, result, pg, ones); + return result; +} + +void Simulator::ExtractFromSimVRegister(VectorFormat vform, + SimPRegister& pd, + SimVRegister vreg) { + SimVRegister zero; + dup_immediate(kFormatVnB, zero, 0); + SVEIntCompareVectorsHelper(ne, + vform, + pd, + GetPTrue(), + vreg, + zero, + false, + LeaveFlags); +} + #define COLOUR(colour_code) "\033[0;" colour_code "m" #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m" #define COLOUR_HIGHLIGHT "\033[43m" @@ -291,6 +417,8 @@ void Simulator::SetColouredTrace(bool value) { clr_reg_value = value ? COLOUR(CYAN) : ""; clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : ""; clr_vreg_value = value ? COLOUR(MAGENTA) : ""; + clr_preg_name = value ? COLOUR_BOLD(GREEN) : ""; + clr_preg_value = value ? COLOUR(GREEN) : ""; clr_memory_address = value ? COLOUR_BOLD(BLUE) : ""; clr_warning = value ? COLOUR_BOLD(YELLOW) : ""; clr_warning_message = value ? COLOUR(YELLOW) : ""; @@ -322,22 +450,6 @@ void Simulator::SetTraceParameters(int parameters) { } -void Simulator::SetInstructionStats(bool value) { - if (value != instruction_stats_) { - if (value) { - if (instrumentation_ == NULL) { - // Set the sample period to 10, as the VIXL examples and tests are - // short. - instrumentation_ = new Instrument("vixl_stats.csv", 10); - } - decoder_->AppendVisitor(instrumentation_); - } else if (instrumentation_ != NULL) { - decoder_->RemoveVisitor(instrumentation_); - } - instruction_stats_ = value; - } -} - // Helpers --------------------------------------------------------------------- uint64_t Simulator::AddWithCarry(unsigned reg_size, bool set_flags, @@ -379,44 +491,50 @@ uint64_t Simulator::AddWithCarry(unsigned reg_size, int64_t Simulator::ShiftOperand(unsigned reg_size, - int64_t value, + uint64_t uvalue, Shift shift_type, unsigned amount) const { - VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); - if (amount == 0) { - return value; - } - uint64_t uvalue = static_cast<uint64_t>(value); - uint64_t mask = kWRegMask; - bool is_negative = (uvalue & kWSignMask) != 0; - if (reg_size == kXRegSize) { - mask = kXRegMask; - is_negative = (uvalue & kXSignMask) != 0; - } - - switch (shift_type) { - case LSL: - uvalue <<= amount; - break; - case LSR: - uvalue >>= amount; - break; - case ASR: - uvalue >>= amount; - if (is_negative) { - // Simulate sign-extension to 64 bits. - uvalue |= ~UINT64_C(0) << (reg_size - amount); + VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) || + (reg_size == kSRegSize) || (reg_size == kDRegSize)); + if (amount > 0) { + uint64_t mask = GetUintMask(reg_size); + bool is_negative = (uvalue & GetSignMask(reg_size)) != 0; + // The behavior is undefined in c++ if the shift amount greater than or + // equal to the register lane size. Work out the shifted result based on + // architectural behavior before performing the c++ type shfit operations. + switch (shift_type) { + case LSL: + if (amount >= reg_size) { + return UINT64_C(0); + } + uvalue <<= amount; + break; + case LSR: + if (amount >= reg_size) { + return UINT64_C(0); + } + uvalue >>= amount; + break; + case ASR: + if (amount >= reg_size) { + return is_negative ? ~UINT64_C(0) : UINT64_C(0); + } + uvalue >>= amount; + if (is_negative) { + // Simulate sign-extension to 64 bits. + uvalue |= ~UINT64_C(0) << (reg_size - amount); + } + break; + case ROR: { + uvalue = RotateRight(uvalue, amount, reg_size); + break; } - break; - case ROR: { - uvalue = RotateRight(uvalue, amount, reg_size); - break; + default: + VIXL_UNIMPLEMENTED(); + return 0; } - default: - VIXL_UNIMPLEMENTED(); - return 0; + uvalue &= mask; } - uvalue &= mask; int64_t result; memcpy(&result, &uvalue, sizeof(result)); @@ -592,6 +710,15 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat( return kPrintReg1S; case kFormatD: return kPrintReg1D; + + case kFormatVnB: + return kPrintRegVnB; + case kFormatVnH: + return kPrintRegVnH; + case kFormatVnS: + return kPrintRegVnS; + case kFormatVnD: + return kPrintRegVnD; } } @@ -623,301 +750,445 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP( } } - -void Simulator::PrintWrittenRegisters() { +void Simulator::PrintRegisters() { for (unsigned i = 0; i < kNumberOfRegisters; i++) { - if (registers_[i].WrittenSinceLastLog()) PrintRegister(i); + if (i == kSpRegCode) i = kSPRegInternalCode; + PrintRegister(i); } } - -void Simulator::PrintWrittenVRegisters() { +void Simulator::PrintVRegisters() { for (unsigned i = 0; i < kNumberOfVRegisters; i++) { - // At this point there is no type information, so print as a raw 1Q. - if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q); + PrintVRegister(i); } } - -void Simulator::PrintSystemRegisters() { - PrintSystemRegister(NZCV); - PrintSystemRegister(FPCR); +void Simulator::PrintZRegisters() { + for (unsigned i = 0; i < kNumberOfZRegisters; i++) { + PrintZRegister(i); + } } - -void Simulator::PrintRegisters() { +void Simulator::PrintWrittenRegisters() { for (unsigned i = 0; i < kNumberOfRegisters; i++) { - PrintRegister(i); + if (registers_[i].WrittenSinceLastLog()) { + if (i == kSpRegCode) i = kSPRegInternalCode; + PrintRegister(i); + } } } - -void Simulator::PrintVRegisters() { +void Simulator::PrintWrittenVRegisters() { + bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE); for (unsigned i = 0; i < kNumberOfVRegisters; i++) { - // At this point there is no type information, so print as a raw 1Q. - PrintVRegister(i, kPrintReg1Q); + if (vregisters_[i].WrittenSinceLastLog()) { + // Z registers are initialised in the constructor before the user can + // configure the CPU features, so we must also check for SVE here. + if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) { + PrintZRegister(i); + } else { + PrintVRegister(i); + } + } } } - -// Print a register's name and raw value. -// -// Only the least-significant `size_in_bytes` bytes of the register are printed, -// but the value is aligned as if the whole register had been printed. -// -// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes -// -- the default -- so that the whole register is printed. Other values of -// size_in_bytes are intended for use when the register hasn't actually been -// updated (such as in PrintWrite). -// -// No newline is printed. This allows the caller to print more details (such as -// a memory access annotation). -void Simulator::PrintRegisterRawHelper(unsigned code, - Reg31Mode r31mode, - int size_in_bytes) { - // The template for all supported sizes. - // "# x{code}: 0xffeeddccbbaa9988" - // "# w{code}: 0xbbaa9988" - // "# w{code}<15:0>: 0x9988" - // "# w{code}<7:0>: 0x88" - unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2; - - const char* name = ""; - const char* suffix = ""; - switch (size_in_bytes) { - case kXRegSizeInBytes: - name = XRegNameForCode(code, r31mode); - break; - case kWRegSizeInBytes: - name = WRegNameForCode(code, r31mode); - break; - case 2: - name = WRegNameForCode(code, r31mode); - suffix = "<15:0>"; - padding_chars -= strlen(suffix); - break; - case 1: - name = WRegNameForCode(code, r31mode); - suffix = "<7:0>"; - padding_chars -= strlen(suffix); - break; - default: - VIXL_UNREACHABLE(); - } - fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix); - - // Print leading padding spaces. - VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2)); - for (unsigned i = 0; i < padding_chars; i++) { - putc(' ', stream_); +void Simulator::PrintWrittenPRegisters() { + // P registers are initialised in the constructor before the user can + // configure the CPU features, so we must check for SVE here. + if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return; + for (unsigned i = 0; i < kNumberOfPRegisters; i++) { + if (pregisters_[i].WrittenSinceLastLog()) { + PrintPRegister(i); + } } - - // Print the specified bits in hexadecimal format. - uint64_t bits = ReadRegister<uint64_t>(code, r31mode); - bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8); - VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes); - - int chars = size_in_bytes * 2; - fprintf(stream_, - "%s0x%0*" PRIx64 "%s", - clr_reg_value, - chars, - bits, - clr_normal); + if (ReadFFR().WrittenSinceLastLog()) PrintFFR(); } - -void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) { - registers_[code].NotifyRegisterLogged(); - - // Don't print writes into xzr. - if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) { - return; - } - - // The template for all x and w registers: - // "# x{code}: 0x{value}" - // "# w{code}: 0x{value}" - - PrintRegisterRawHelper(code, r31mode); - fprintf(stream_, "\n"); +void Simulator::PrintSystemRegisters() { + PrintSystemRegister(NZCV); + PrintSystemRegister(FPCR); } - -// Print a register's name and raw value. -// -// The `bytes` and `lsb` arguments can be used to limit the bytes that are -// printed. These arguments are intended for use in cases where register hasn't -// actually been updated (such as in PrintVWrite). -// -// No newline is printed. This allows the caller to print more details (such as -// a floating-point interpretation or a memory access annotation). -void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) { - // The template for vector types: - // "# v{code}: 0xffeeddccbbaa99887766554433221100". - // An example with bytes=4 and lsb=8: - // "# v{code}: 0xbbaa9988 ". - fprintf(stream_, - "# %s%5s: %s", - clr_vreg_name, - VRegNameForCode(code), - clr_vreg_value); - - int msb = lsb + bytes - 1; - int byte = kQRegSizeInBytes - 1; - - // Print leading padding spaces. (Two spaces per byte.) - while (byte > msb) { +void Simulator::PrintRegisterValue(const uint8_t* value, + int value_size, + PrintRegisterFormat format) { + int print_width = GetPrintRegSizeInBytes(format); + VIXL_ASSERT(print_width <= value_size); + for (int i = value_size - 1; i >= print_width; i--) { + // Pad with spaces so that values align vertically. fprintf(stream_, " "); - byte--; + // If we aren't explicitly printing a partial value, ensure that the + // unprinted bits are zero. + VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0)); } - - // Print the specified part of the value, byte by byte. - qreg_t rawbits = ReadQRegister(code); fprintf(stream_, "0x"); - while (byte >= lsb) { - fprintf(stream_, "%02x", rawbits.val[byte]); - byte--; + for (int i = print_width - 1; i >= 0; i--) { + fprintf(stream_, "%02x", value[i]); } +} - // Print trailing padding spaces. - while (byte >= 0) { - fprintf(stream_, " "); - byte--; +void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value, + uint16_t lane_mask, + PrintRegisterFormat format) { + VIXL_ASSERT((format & kPrintRegAsFP) != 0); + int lane_size = GetPrintRegLaneSizeInBytes(format); + fprintf(stream_, " ("); + bool last_inactive = false; + const char* sep = ""; + for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") { + bool access = (lane_mask & (1 << (i * lane_size))) != 0; + if (access) { + // Read the lane as a double, so we can format all FP types in the same + // way. We squash NaNs, and a double can exactly represent any other value + // that the smaller types can represent, so this is lossless. + double element; + switch (lane_size) { + case kHRegSizeInBytes: { + Float16 element_fp16; + VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes); + memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16)); + element = FPToDouble(element_fp16, kUseDefaultNaN); + break; + } + case kSRegSizeInBytes: { + float element_fp32; + memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32)); + element = static_cast<double>(element_fp32); + break; + } + case kDRegSizeInBytes: { + memcpy(&element, &value[i * lane_size], sizeof(element)); + break; + } + default: + VIXL_UNREACHABLE(); + fprintf(stream_, "{UnknownFPValue}"); + continue; + } + if (IsNaN(element)) { + // The fprintf behaviour for NaNs is implementation-defined. Always + // print "nan", so that traces are consistent. + fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal); + } else { + fprintf(stream_, + "%s%s%#.4g%s", + sep, + clr_vreg_value, + element, + clr_normal); + } + last_inactive = false; + } else if (!last_inactive) { + // Replace each contiguous sequence of inactive lanes with "...". + fprintf(stream_, "%s...", sep); + last_inactive = true; + } } - fprintf(stream_, "%s", clr_normal); + fprintf(stream_, ")"); } +void Simulator::PrintRegister(int code, + PrintRegisterFormat format, + const char* suffix) { + VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) || + (static_cast<unsigned>(code) == kSPRegInternalCode)); + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar); + VIXL_ASSERT((format & kPrintRegAsFP) == 0); -// Print each of the specified lanes of a register as a float or double value. -// -// The `lane_count` and `lslane` arguments can be used to limit the lanes that -// are printed. These arguments are intended for use in cases where register -// hasn't actually been updated (such as in PrintVWrite). -// -// No newline is printed. This allows the caller to print more details (such as -// a memory access annotation). -void Simulator::PrintVRegisterFPHelper(unsigned code, - unsigned lane_size_in_bytes, - int lane_count, - int rightmost_lane) { - VIXL_ASSERT((lane_size_in_bytes == kHRegSizeInBytes) || - (lane_size_in_bytes == kSRegSizeInBytes) || - (lane_size_in_bytes == kDRegSizeInBytes)); - - unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes); - VIXL_ASSERT(msb <= kQRegSizeInBytes); - - // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register - // name is used: - // " (h{code}: {value})" - // " (s{code}: {value})" - // " (d{code}: {value})" - // For vector types, "..." is used to represent one or more omitted lanes. - // " (..., {value}, {value}, ...)" - if (lane_size_in_bytes == kHRegSizeInBytes) { - // TODO: Trace tests will fail until we regenerate them. - return; - } - if ((lane_count == 1) && (rightmost_lane == 0)) { - const char* name; - switch (lane_size_in_bytes) { - case kHRegSizeInBytes: - name = HRegNameForCode(code); + SimRegister* reg; + SimRegister zero; + if (code == kZeroRegCode) { + reg = &zero; + } else { + // registers_[31] holds the SP. + VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31); + reg = ®isters_[code % kNumberOfRegisters]; + } + + // We trace register writes as whole register values, implying that any + // unprinted bits are all zero: + // "# x{code}: 0x{-----value----}" + // "# w{code}: 0x{-value}" + // Stores trace partial register values, implying nothing about the unprinted + // bits: + // "# x{code}<63:0>: 0x{-----value----}" + // "# x{code}<31:0>: 0x{-value}" + // "# x{code}<15:0>: 0x{--}" + // "# x{code}<7:0>: 0x{}" + + bool is_partial = (format & kPrintRegPartial) != 0; + unsigned print_reg_size = GetPrintRegSizeInBits(format); + std::stringstream name; + if (is_partial) { + name << XRegNameForCode(code) << GetPartialRegSuffix(format); + } else { + // Notify the register that it has been logged, but only if we're printing + // all of it. + reg->NotifyRegisterLogged(); + switch (print_reg_size) { + case kWRegSize: + name << WRegNameForCode(code); break; - case kSRegSizeInBytes: - name = SRegNameForCode(code); - break; - case kDRegSizeInBytes: - name = DRegNameForCode(code); + case kXRegSize: + name << XRegNameForCode(code); break; default: - name = NULL; VIXL_UNREACHABLE(); - } - fprintf(stream_, " (%s%s: ", clr_vreg_name, name); - } else { - if (msb < (kQRegSizeInBytes - 1)) { - fprintf(stream_, " (..., "); - } else { - fprintf(stream_, " ("); + return; } } - // Print the list of values. - const char* separator = ""; - int leftmost_lane = rightmost_lane + lane_count - 1; - for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) { - double value; - switch (lane_size_in_bytes) { - case kHRegSizeInBytes: - value = ReadVRegister(code).GetLane<uint16_t>(lane); + fprintf(stream_, + "# %s%*s: %s", + clr_reg_name, + kPrintRegisterNameFieldWidth, + name.str().c_str(), + clr_reg_value); + PrintRegisterValue(*reg, format); + fprintf(stream_, "%s%s", clr_normal, suffix); +} + +void Simulator::PrintVRegister(int code, + PrintRegisterFormat format, + const char* suffix) { + VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters); + VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) || + ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) || + ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector)); + + // We trace register writes as whole register values, implying that any + // unprinted bits are all zero: + // "# v{code}: 0x{-------------value------------}" + // "# d{code}: 0x{-----value----}" + // "# s{code}: 0x{-value}" + // "# h{code}: 0x{--}" + // "# b{code}: 0x{}" + // Stores trace partial register values, implying nothing about the unprinted + // bits: + // "# v{code}<127:0>: 0x{-------------value------------}" + // "# v{code}<63:0>: 0x{-----value----}" + // "# v{code}<31:0>: 0x{-value}" + // "# v{code}<15:0>: 0x{--}" + // "# v{code}<7:0>: 0x{}" + + bool is_partial = ((format & kPrintRegPartial) != 0); + std::stringstream name; + unsigned print_reg_size = GetPrintRegSizeInBits(format); + if (is_partial) { + name << VRegNameForCode(code) << GetPartialRegSuffix(format); + } else { + // Notify the register that it has been logged, but only if we're printing + // all of it. + vregisters_[code].NotifyRegisterLogged(); + switch (print_reg_size) { + case kBRegSize: + name << BRegNameForCode(code); break; - case kSRegSizeInBytes: - value = ReadVRegister(code).GetLane<float>(lane); + case kHRegSize: + name << HRegNameForCode(code); break; - case kDRegSizeInBytes: - value = ReadVRegister(code).GetLane<double>(lane); + case kSRegSize: + name << SRegNameForCode(code); + break; + case kDRegSize: + name << DRegNameForCode(code); + break; + case kQRegSize: + name << VRegNameForCode(code); break; default: - value = 0.0; VIXL_UNREACHABLE(); + return; } - if (IsNaN(value)) { - // The output for NaNs is implementation defined. Always print `nan`, so - // that traces are coherent across different implementations. - fprintf(stream_, "%s%snan%s", separator, clr_vreg_value, clr_normal); - } else { - fprintf(stream_, - "%s%s%#g%s", - separator, - clr_vreg_value, - value, - clr_normal); - } - separator = ", "; } - if (rightmost_lane > 0) { - fprintf(stream_, ", ..."); + fprintf(stream_, + "# %s%*s: %s", + clr_vreg_name, + kPrintRegisterNameFieldWidth, + name.str().c_str(), + clr_vreg_value); + PrintRegisterValue(vregisters_[code], format); + fprintf(stream_, "%s", clr_normal); + if ((format & kPrintRegAsFP) != 0) { + PrintRegisterValueFPAnnotations(vregisters_[code], format); + } + fprintf(stream_, "%s", suffix); +} + +void Simulator::PrintVRegistersForStructuredAccess(int rt_code, + int reg_count, + uint16_t focus_mask, + PrintRegisterFormat format) { + bool print_fp = (format & kPrintRegAsFP) != 0; + // Suppress FP formatting, so we can specify the lanes we're interested in. + PrintRegisterFormat format_no_fp = + static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP); + + for (int r = 0; r < reg_count; r++) { + int code = (rt_code + r) % kNumberOfVRegisters; + PrintVRegister(code, format_no_fp, ""); + if (print_fp) { + PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format); + } + fprintf(stream_, "\n"); } - fprintf(stream_, ")"); } +void Simulator::PrintZRegistersForStructuredAccess(int rt_code, + int q_index, + int reg_count, + uint16_t focus_mask, + PrintRegisterFormat format) { + bool print_fp = (format & kPrintRegAsFP) != 0; + // Suppress FP formatting, so we can specify the lanes we're interested in. + PrintRegisterFormat format_no_fp = + static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP); + + PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format); + + const unsigned size = kQRegSizeInBytes; + unsigned byte_index = q_index * size; + const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index; + VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes()); + + for (int r = 0; r < reg_count; r++) { + int code = (rt_code + r) % kNumberOfZRegisters; + PrintPartialZRegister(code, q_index, format_no_fp, ""); + if (print_fp) { + PrintRegisterValueFPAnnotations(value, focus_mask, format_q); + } + fprintf(stream_, "\n"); + } +} -void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) { +void Simulator::PrintZRegister(int code, PrintRegisterFormat format) { + // We're going to print the register in parts, so force a partial format. + format = GetPrintRegPartial(format); + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + int vl = GetVectorLengthInBits(); + VIXL_ASSERT((vl % kQRegSize) == 0); + for (unsigned i = 0; i < (vl / kQRegSize); i++) { + PrintPartialZRegister(code, i, format); + } vregisters_[code].NotifyRegisterLogged(); +} - int lane_size_log2 = format & kPrintRegLaneSizeMask; +void Simulator::PrintPRegister(int code, PrintRegisterFormat format) { + // We're going to print the register in parts, so force a partial format. + format = GetPrintRegPartial(format); + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + int vl = GetVectorLengthInBits(); + VIXL_ASSERT((vl % kQRegSize) == 0); + for (unsigned i = 0; i < (vl / kQRegSize); i++) { + PrintPartialPRegister(code, i, format); + } + pregisters_[code].NotifyRegisterLogged(); +} - int reg_size_log2; - if (format & kPrintRegAsQVector) { - reg_size_log2 = kQRegSizeInBytesLog2; - } else if (format & kPrintRegAsDVector) { - reg_size_log2 = kDRegSizeInBytesLog2; - } else { - // Scalar types. - reg_size_log2 = lane_size_log2; +void Simulator::PrintFFR(PrintRegisterFormat format) { + // We're going to print the register in parts, so force a partial format. + format = GetPrintRegPartial(format); + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + int vl = GetVectorLengthInBits(); + VIXL_ASSERT((vl % kQRegSize) == 0); + SimPRegister& ffr = ReadFFR(); + for (unsigned i = 0; i < (vl / kQRegSize); i++) { + PrintPartialPRegister("FFR", ffr, i, format); } + ffr.NotifyRegisterLogged(); +} + +void Simulator::PrintPartialZRegister(int code, + int q_index, + PrintRegisterFormat format, + const char* suffix) { + VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters); + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + VIXL_ASSERT((format & kPrintRegPartial) != 0); + VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits()); - int lane_count = 1 << (reg_size_log2 - lane_size_log2); - int lane_size = 1 << lane_size_log2; + // We _only_ trace partial Z register values in Q-sized chunks, because + // they're often too large to reasonably fit on a single line. Each line + // implies nothing about the unprinted bits. + // "# z{code}<127:0>: 0x{-------------value------------}" - // The template for vector types: - // "# v{code}: 0x{rawbits} (..., {value}, ...)". - // The template for scalar types: - // "# v{code}: 0x{rawbits} ({reg}:{value})". - // The values in parentheses after the bit representations are floating-point - // interpretations. They are displayed only if the kPrintVRegAsFP bit is set. + format = GetPrintRegAsQChunkOfSVE(format); - PrintVRegisterRawHelper(code); - if (format & kPrintRegAsFP) { - PrintVRegisterFPHelper(code, lane_size, lane_count); + const unsigned size = kQRegSizeInBytes; + unsigned byte_index = q_index * size; + const uint8_t* value = vregisters_[code].GetBytes() + byte_index; + VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes()); + + int lsb = q_index * kQRegSize; + int msb = lsb + kQRegSize - 1; + std::stringstream name; + name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>'; + + fprintf(stream_, + "# %s%*s: %s", + clr_vreg_name, + kPrintRegisterNameFieldWidth, + name.str().c_str(), + clr_vreg_value); + PrintRegisterValue(value, size, format); + fprintf(stream_, "%s", clr_normal); + if ((format & kPrintRegAsFP) != 0) { + PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format); } + fprintf(stream_, "%s", suffix); +} + +void Simulator::PrintPartialPRegister(const char* name, + const SimPRegister& reg, + int q_index, + PrintRegisterFormat format, + const char* suffix) { + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + VIXL_ASSERT((format & kPrintRegPartial) != 0); + VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits()); - fprintf(stream_, "\n"); + // We don't currently use the format for anything here. + USE(format); + + // We _only_ trace partial P register values, because they're often too large + // to reasonably fit on a single line. Each line implies nothing about the + // unprinted bits. + // + // We print values in binary, with spaces between each bit, in order for the + // bits to align with the Z register bytes that they predicate. + // "# {name}<15:0>: 0b{-------------value------------}" + + int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit; + int lsb = q_index * print_size_in_bits; + int msb = lsb + print_size_in_bits - 1; + std::stringstream prefix; + prefix << name << '<' << msb << ':' << lsb << '>'; + + fprintf(stream_, + "# %s%*s: %s0b", + clr_preg_name, + kPrintRegisterNameFieldWidth, + prefix.str().c_str(), + clr_preg_value); + for (int i = msb; i >= lsb; i--) { + fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0'); + } + fprintf(stream_, "%s%s", clr_normal, suffix); } +void Simulator::PrintPartialPRegister(int code, + int q_index, + PrintRegisterFormat format, + const char* suffix) { + VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters); + PrintPartialPRegister(PRegNameForCode(code), + pregisters_[code], + q_index, + format, + suffix); +} void Simulator::PrintSystemRegister(SystemRegister id) { switch (id) { @@ -954,90 +1225,347 @@ void Simulator::PrintSystemRegister(SystemRegister id) { } } - -void Simulator::PrintRead(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format) { - registers_[reg_code].NotifyRegisterLogged(); - - USE(format); - - // The template is "# {reg}: 0x{value} <- {address}". - PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister); +uint16_t Simulator::PrintPartialAccess(uint16_t access_mask, + uint16_t future_access_mask, + int struct_element_count, + int lane_size_in_bytes, + const char* op, + uintptr_t address, + int reg_size_in_bytes) { + // We want to assume that we'll access at least one lane. + VIXL_ASSERT(access_mask != 0); + VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) || + (reg_size_in_bytes == kQRegSizeInBytes)); + bool started_annotation = false; + // Indent to match the register field, the fixed formatting, and the value + // prefix ("0x"): "# {name}: 0x" + fprintf(stream_, "# %*s ", kPrintRegisterNameFieldWidth, ""); + // First, annotate the lanes (byte by byte). + for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) { + bool access = (access_mask & (1 << lane)) != 0; + bool future = (future_access_mask & (1 << lane)) != 0; + if (started_annotation) { + // If we've started an annotation, draw a horizontal line in addition to + // any other symbols. + if (access) { + fprintf(stream_, "─╨"); + } else if (future) { + fprintf(stream_, "─║"); + } else { + fprintf(stream_, "──"); + } + } else { + if (access) { + started_annotation = true; + fprintf(stream_, " ╙"); + } else if (future) { + fprintf(stream_, " ║"); + } else { + fprintf(stream_, " "); + } + } + } + VIXL_ASSERT(started_annotation); + fprintf(stream_, "─ 0x"); + int lane_size_in_nibbles = lane_size_in_bytes * 2; + // Print the most-significant struct element first. + const char* sep = ""; + for (int i = struct_element_count - 1; i >= 0; i--) { + int offset = lane_size_in_bytes * i; + uint64_t nibble = Memory::Read(lane_size_in_bytes, address + offset); + fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble); + sep = "'"; + } fprintf(stream_, - " <- %s0x%016" PRIxPTR "%s\n", + " %s %s0x%016" PRIxPTR "%s\n", + op, clr_memory_address, address, clr_normal); + return future_access_mask & ~access_mask; } - -void Simulator::PrintVRead(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format, - unsigned lane) { - vregisters_[reg_code].NotifyRegisterLogged(); - - // The template is "# v{code}: 0x{rawbits} <- address". - PrintVRegisterRawHelper(reg_code); - if (format & kPrintRegAsFP) { - PrintVRegisterFPHelper(reg_code, - GetPrintRegLaneSizeInBytes(format), - GetPrintRegLaneCount(format), - lane); +void Simulator::PrintAccess(int code, + PrintRegisterFormat format, + const char* op, + uintptr_t address) { + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + if ((format & kPrintRegPartial) == 0) { + registers_[code].NotifyRegisterLogged(); } + // Scalar-format accesses use a simple format: + // "# {reg}: 0x{value} -> {address}" + + // Suppress the newline, so the access annotation goes on the same line. + PrintRegister(code, format, ""); fprintf(stream_, - " <- %s0x%016" PRIxPTR "%s\n", + " %s %s0x%016" PRIxPTR "%s\n", + op, clr_memory_address, address, clr_normal); } +void Simulator::PrintVAccess(int code, + PrintRegisterFormat format, + const char* op, + uintptr_t address) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); -void Simulator::PrintWrite(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format) { - VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + // Scalar-format accesses use a simple format: + // "# v{code}: 0x{value} -> {address}" - // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy - // and readable, the value is aligned with the values in the register trace. - PrintRegisterRawHelper(reg_code, - Reg31IsZeroRegister, - GetPrintRegSizeInBytes(format)); + // Suppress the newline, so the access annotation goes on the same line. + PrintVRegister(code, format, ""); fprintf(stream_, - " -> %s0x%016" PRIxPTR "%s\n", + " %s %s0x%016" PRIxPTR "%s\n", + op, clr_memory_address, address, clr_normal); } +void Simulator::PrintVStructAccess(int rt_code, + int reg_count, + PrintRegisterFormat format, + const char* op, + uintptr_t address) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + + // For example: + // "# v{code}: 0x{value}" + // "# ...: 0x{value}" + // "# ║ ╙─ {struct_value} -> {lowest_address}" + // "# ╙───── {struct_value} -> {highest_address}" + + uint16_t lane_mask = GetPrintRegLaneMask(format); + PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format); + + int reg_size_in_bytes = GetPrintRegSizeInBytes(format); + int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format); + for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) { + uint16_t access_mask = 1 << i; + VIXL_ASSERT((lane_mask & access_mask) != 0); + lane_mask = PrintPartialAccess(access_mask, + lane_mask, + reg_count, + lane_size_in_bytes, + op, + address + (i * reg_count)); + } +} + +void Simulator::PrintVSingleStructAccess(int rt_code, + int reg_count, + int lane, + PrintRegisterFormat format, + const char* op, + uintptr_t address) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + + // For example: + // "# v{code}: 0x{value}" + // "# ...: 0x{value}" + // "# ╙───── {struct_value} -> {address}" + + int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format); + uint16_t lane_mask = 1 << (lane * lane_size_in_bytes); + PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format); + PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address); +} + +void Simulator::PrintVReplicatingStructAccess(int rt_code, + int reg_count, + PrintRegisterFormat format, + const char* op, + uintptr_t address) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + + // For example: + // "# v{code}: 0x{value}" + // "# ...: 0x{value}" + // "# ╙─╨─╨─╨─ {struct_value} -> {address}" + + int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format); + uint16_t lane_mask = GetPrintRegLaneMask(format); + PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format); + PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address); +} + +void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + + // Scalar-format accesses are split into separate chunks, each of which uses a + // simple format: + // "# z{code}<127:0>: 0x{value} -> {address}" + // "# z{code}<255:128>: 0x{value} -> {address + 16}" + // "# z{code}<383:256>: 0x{value} -> {address + 32}" + // etc + + int vl = GetVectorLengthInBits(); + VIXL_ASSERT((vl % kQRegSize) == 0); + for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) { + // Suppress the newline, so the access annotation goes on the same line. + PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, ""); + fprintf(stream_, + " %s %s0x%016" PRIxPTR "%s\n", + op, + clr_memory_address, + address, + clr_normal); + address += kQRegSizeInBytes; + } +} + +void Simulator::PrintZStructAccess(int rt_code, + int reg_count, + const LogicPRegister& pg, + PrintRegisterFormat format, + int msize_in_bytes, + const char* op, + const LogicSVEAddressVector& addr) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + + // For example: + // "# z{code}<255:128>: 0x{value}" + // "# ...<255:128>: 0x{value}" + // "# ║ ╙─ {struct_value} -> {first_address}" + // "# ╙───── {struct_value} -> {last_address}" + + // We're going to print the register in parts, so force a partial format. + bool skip_inactive_chunks = (format & kPrintRegPartial) != 0; + format = GetPrintRegPartial(format); + + int esize_in_bytes = GetPrintRegLaneSizeInBytes(format); + int vl = GetVectorLengthInBits(); + VIXL_ASSERT((vl % kQRegSize) == 0); + int lanes_per_q = kQRegSizeInBytes / esize_in_bytes; + for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) { + uint16_t pred = + pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format); + if ((pred == 0) && skip_inactive_chunks) continue; + + PrintZRegistersForStructuredAccess(rt_code, + q_index, + reg_count, + pred, + format); + if (pred == 0) { + // This register chunk has no active lanes. The loop below would print + // nothing, so leave a blank line to keep structures grouped together. + fprintf(stream_, "#\n"); + continue; + } + for (int i = 0; i < lanes_per_q; i++) { + uint16_t access = 1 << (i * esize_in_bytes); + int lane = (q_index * lanes_per_q) + i; + // Skip inactive lanes. + if ((pred & access) == 0) continue; + pred = PrintPartialAccess(access, + pred, + reg_count, + msize_in_bytes, + op, + addr.GetStructAddress(lane)); + } + } -void Simulator::PrintVWrite(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format, - unsigned lane) { - // The templates: - // "# v{code}: 0x{rawbits} -> {address}" - // "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}". - // "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}" - // Because this trace doesn't represent a change to the source register's - // value, only the relevant part of the value is printed. To keep the trace - // tidy and readable, the raw value is aligned with the other values in the - // register trace. - int lane_count = GetPrintRegLaneCount(format); - int lane_size = GetPrintRegLaneSizeInBytes(format); - int reg_size = GetPrintRegSizeInBytes(format); - PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane); - if (format & kPrintRegAsFP) { - PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane); + // We print the whole register, even for stores. + for (int i = 0; i < reg_count; i++) { + vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged(); + } +} + +void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) { + VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); + + // Scalar-format accesses are split into separate chunks, each of which uses a + // simple format: + // "# p{code}<15:0>: 0b{value} -> {address}" + // "# p{code}<31:16>: 0b{value} -> {address + 2}" + // "# p{code}<47:32>: 0b{value} -> {address + 4}" + // etc + + int vl = GetVectorLengthInBits(); + VIXL_ASSERT((vl % kQRegSize) == 0); + for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) { + // Suppress the newline, so the access annotation goes on the same line. + PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, ""); + fprintf(stream_, + " %s %s0x%016" PRIxPTR "%s\n", + op, + clr_memory_address, + address, + clr_normal); + address += kQRegSizeInBytes; } - fprintf(stream_, - " -> %s0x%016" PRIxPTR "%s\n", - clr_memory_address, - address, - clr_normal); } +void Simulator::PrintRead(int rt_code, + PrintRegisterFormat format, + uintptr_t address) { + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + registers_[rt_code].NotifyRegisterLogged(); + PrintAccess(rt_code, format, "<-", address); +} + +void Simulator::PrintExtendingRead(int rt_code, + PrintRegisterFormat format, + int access_size_in_bytes, + uintptr_t address) { + int reg_size_in_bytes = GetPrintRegSizeInBytes(format); + if (access_size_in_bytes == reg_size_in_bytes) { + // There is no extension here, so print a simple load. + PrintRead(rt_code, format, address); + return; + } + VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes); + + // For sign- and zero-extension, make it clear that the resulting register + // value is different from what is loaded from memory. + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + registers_[rt_code].NotifyRegisterLogged(); + PrintRegister(rt_code, format); + PrintPartialAccess(1, + 0, + 1, + access_size_in_bytes, + "<-", + address, + kXRegSizeInBytes); +} + +void Simulator::PrintVRead(int rt_code, + PrintRegisterFormat format, + uintptr_t address) { + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + vregisters_[rt_code].NotifyRegisterLogged(); + PrintVAccess(rt_code, format, "<-", address); +} + +void Simulator::PrintWrite(int rt_code, + PrintRegisterFormat format, + uintptr_t address) { + // Because this trace doesn't represent a change to the source register's + // value, only print the relevant part of the value. + format = GetPrintRegPartial(format); + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + registers_[rt_code].NotifyRegisterLogged(); + PrintAccess(rt_code, format, "->", address); +} + +void Simulator::PrintVWrite(int rt_code, + PrintRegisterFormat format, + uintptr_t address) { + // Because this trace doesn't represent a change to the source register's + // value, only print the relevant part of the value. + format = GetPrintRegPartial(format); + // It only makes sense to write scalar values here. Vectors are handled by + // PrintVStructAccess. + VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); + PrintVAccess(rt_code, format, "->", address); +} void Simulator::PrintTakenBranch(const Instruction* target) { fprintf(stream_, @@ -1047,7 +1575,6 @@ void Simulator::PrintTakenBranch(const Instruction* target) { reinterpret_cast<uint64_t>(target)); } - // Visitors--------------------------------------------------------------------- @@ -1289,7 +1816,7 @@ void Simulator::VisitAddSubShifted(const Instruction* instr) { void Simulator::VisitAddSubImmediate(const Instruction* instr) { int64_t op2 = instr->GetImmAddSub() - << ((instr->GetShiftAddSub() == 1) ? 12 : 0); + << ((instr->GetImmAddSubShift() == 1) ? 12 : 0); AddSubHelper(instr, op2); } @@ -1489,7 +2016,7 @@ void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) { // Approximate load-acquire by issuing a full barrier after the load. __sync_synchronize(); - LogRead(address, rt, GetPrintRegisterFormat(element_size)); + LogRead(rt, GetPrintRegisterFormat(element_size), address); } @@ -1516,7 +2043,7 @@ void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) { Memory::Write<T>(address, ReadRegister<T>(rt)); - LogWrite(address, rt, GetPrintRegisterFormat(element_size)); + LogWrite(rt, GetPrintRegisterFormat(element_size), address); } @@ -1603,7 +2130,7 @@ void Simulator::VisitLoadStorePAC(const Instruction* instr) { WriteXRegister(dst, Memory::Read<uint64_t>(addr_ptr), NoRegLog); unsigned access_size = 1 << 3; - LogRead(addr_ptr, dst, GetPrintRegisterFormatForSize(access_size)); + LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr); } @@ -1624,49 +2151,65 @@ void Simulator::LoadStoreHelper(const Instruction* instr, unsigned srcdst = instr->GetRt(); uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode); + bool rt_is_vreg = false; + int extend_to_size = 0; LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask)); switch (op) { case LDRB_w: WriteWRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog); + extend_to_size = kWRegSizeInBytes; break; case LDRH_w: WriteWRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog); + extend_to_size = kWRegSizeInBytes; break; case LDR_w: WriteWRegister(srcdst, Memory::Read<uint32_t>(address), NoRegLog); + extend_to_size = kWRegSizeInBytes; break; case LDR_x: WriteXRegister(srcdst, Memory::Read<uint64_t>(address), NoRegLog); + extend_to_size = kXRegSizeInBytes; break; case LDRSB_w: WriteWRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog); + extend_to_size = kWRegSizeInBytes; break; case LDRSH_w: WriteWRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog); + extend_to_size = kWRegSizeInBytes; break; case LDRSB_x: WriteXRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog); + extend_to_size = kXRegSizeInBytes; break; case LDRSH_x: WriteXRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog); + extend_to_size = kXRegSizeInBytes; break; case LDRSW_x: WriteXRegister(srcdst, Memory::Read<int32_t>(address), NoRegLog); + extend_to_size = kXRegSizeInBytes; break; case LDR_b: WriteBRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog); + rt_is_vreg = true; break; case LDR_h: WriteHRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog); + rt_is_vreg = true; break; case LDR_s: WriteSRegister(srcdst, Memory::Read<float>(address), NoRegLog); + rt_is_vreg = true; break; case LDR_d: WriteDRegister(srcdst, Memory::Read<double>(address), NoRegLog); + rt_is_vreg = true; break; case LDR_q: WriteQRegister(srcdst, Memory::Read<qreg_t>(address), NoRegLog); + rt_is_vreg = true; break; case STRB_w: @@ -1683,18 +2226,23 @@ void Simulator::LoadStoreHelper(const Instruction* instr, break; case STR_b: Memory::Write<uint8_t>(address, ReadBRegister(srcdst)); + rt_is_vreg = true; break; case STR_h: Memory::Write<uint16_t>(address, ReadHRegisterBits(srcdst)); + rt_is_vreg = true; break; case STR_s: Memory::Write<float>(address, ReadSRegister(srcdst)); + rt_is_vreg = true; break; case STR_d: Memory::Write<double>(address, ReadDRegister(srcdst)); + rt_is_vreg = true; break; case STR_q: Memory::Write<qreg_t>(address, ReadQRegister(srcdst)); + rt_is_vreg = true; break; // Ignore prfm hint instructions. @@ -1705,22 +2253,25 @@ void Simulator::LoadStoreHelper(const Instruction* instr, VIXL_UNIMPLEMENTED(); } + // Print a detailed trace (including the memory address). + bool extend = (extend_to_size != 0); unsigned access_size = 1 << instr->GetSizeLS(); + unsigned result_size = extend ? extend_to_size : access_size; + PrintRegisterFormat print_format = + rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size) + : GetPrintRegisterFormatForSize(result_size); + if (instr->IsLoad()) { - if ((op == LDR_s) || (op == LDR_d)) { - LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); - } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) { - LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + if (rt_is_vreg) { + LogVRead(srcdst, print_format, address); } else { - LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + LogExtendingRead(srcdst, print_format, access_size, address); } } else if (instr->IsStore()) { - if ((op == STR_s) || (op == STR_d)) { - LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); - } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) { - LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + if (rt_is_vreg) { + LogVWrite(srcdst, print_format, address); } else { - LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); + LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address); } } else { VIXL_ASSERT(op == PRFM); @@ -1765,6 +2316,8 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, // 'rt' and 'rt2' can only be aliased for stores. VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2)); + bool rt_is_vreg = false; + bool sign_extend = false; switch (op) { // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We // will print a more detailed log. @@ -1776,6 +2329,7 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case LDP_s: { WriteSRegister(rt, Memory::Read<float>(address), NoRegLog); WriteSRegister(rt2, Memory::Read<float>(address2), NoRegLog); + rt_is_vreg = true; break; } case LDP_x: { @@ -1786,16 +2340,19 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case LDP_d: { WriteDRegister(rt, Memory::Read<double>(address), NoRegLog); WriteDRegister(rt2, Memory::Read<double>(address2), NoRegLog); + rt_is_vreg = true; break; } case LDP_q: { WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog); WriteQRegister(rt2, Memory::Read<qreg_t>(address2), NoRegLog); + rt_is_vreg = true; break; } case LDPSW_x: { WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog); WriteXRegister(rt2, Memory::Read<int32_t>(address2), NoRegLog); + sign_extend = true; break; } case STP_w: { @@ -1806,6 +2363,7 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case STP_s: { Memory::Write<float>(address, ReadSRegister(rt)); Memory::Write<float>(address2, ReadSRegister(rt2)); + rt_is_vreg = true; break; } case STP_x: { @@ -1816,40 +2374,43 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case STP_d: { Memory::Write<double>(address, ReadDRegister(rt)); Memory::Write<double>(address2, ReadDRegister(rt2)); + rt_is_vreg = true; break; } case STP_q: { Memory::Write<qreg_t>(address, ReadQRegister(rt)); Memory::Write<qreg_t>(address2, ReadQRegister(rt2)); + rt_is_vreg = true; break; } default: VIXL_UNREACHABLE(); } - // Print a detailed trace (including the memory address) instead of the basic - // register:value trace generated by set_*reg(). + // Print a detailed trace (including the memory address). + unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size; + PrintRegisterFormat print_format = + rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size) + : GetPrintRegisterFormatForSize(result_size); + if (instr->IsLoad()) { - if ((op == LDP_s) || (op == LDP_d)) { - LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size)); - LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size)); - } else if (op == LDP_q) { - LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size)); - LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + if (rt_is_vreg) { + LogVRead(rt, print_format, address); + LogVRead(rt2, print_format, address2); + } else if (sign_extend) { + LogExtendingRead(rt, print_format, element_size, address); + LogExtendingRead(rt2, print_format, element_size, address2); } else { - LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); - LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + LogRead(rt, print_format, address); + LogRead(rt2, print_format, address2); } } else { - if ((op == STP_s) || (op == STP_d)) { - LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size)); - LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size)); - } else if (op == STP_q) { - LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); - LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + if (rt_is_vreg) { + LogVWrite(rt, print_format, address); + LogVWrite(rt2, print_format, address2); } else { - LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); - LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size)); + LogWrite(rt, print_format, address); + LogWrite(rt2, print_format, address2); } } @@ -1890,10 +2451,10 @@ void Simulator::CompareAndSwapHelper(const Instruction* instr) { __sync_synchronize(); } Memory::Write<T>(address, newvalue); - LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address); } - WriteRegister<T>(rs, data); - LogRead(address, rs, GetPrintRegisterFormatForSize(element_size)); + WriteRegister<T>(rs, data, NoRegLog); + LogRead(rs, GetPrintRegisterFormatForSize(element_size), address); } @@ -1904,7 +2465,7 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { unsigned rt = instr->GetRt(); unsigned rn = instr->GetRn(); - VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0)); + VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0)); unsigned element_size = sizeof(T); uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer); @@ -1925,8 +2486,8 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { // associated with that location, even if the compare subsequently fails. local_monitor_.Clear(); - T data_high = Memory::Read<T>(address); - T data_low = Memory::Read<T>(address2); + T data_low = Memory::Read<T>(address); + T data_high = Memory::Read<T>(address2); if (is_acquire) { // Approximate load-acquire by issuing a full barrier after the load. @@ -1941,22 +2502,82 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { __sync_synchronize(); } - Memory::Write<T>(address, newvalue_high); - Memory::Write<T>(address2, newvalue_low); + Memory::Write<T>(address, newvalue_low); + Memory::Write<T>(address2, newvalue_high); } - WriteRegister<T>(rs + 1, data_high); - WriteRegister<T>(rs, data_low); + WriteRegister<T>(rs + 1, data_high, NoRegLog); + WriteRegister<T>(rs, data_low, NoRegLog); - LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size)); - LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size)); + PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); + LogRead(rs, format, address); + LogRead(rs + 1, format, address2); if (same) { - LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size)); - LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size)); + LogWrite(rt, format, address); + LogWrite(rt + 1, format, address2); + } +} + +bool Simulator::CanReadMemory(uintptr_t address, size_t size) { + // To simulate fault-tolerant loads, we need to know what host addresses we + // can access without generating a real fault. One way to do that is to + // attempt to `write()` the memory to a dummy pipe[1]. This is more portable + // and less intrusive than using (global) signal handlers. + // + // [1]: https://stackoverflow.com/questions/7134590 + + size_t written = 0; + bool can_read = true; + // `write` will normally return after one invocation, but it is allowed to + // handle only part of the operation, so wrap it in a loop. + while (can_read && (written < size)) { + ssize_t result = write(dummy_pipe_fd_[1], + reinterpret_cast<void*>(address + written), + size - written); + if (result > 0) { + written += result; + } else { + switch (result) { + case -EPERM: + case -EFAULT: + // The address range is not accessible. + // `write` is supposed to return -EFAULT in this case, but in practice + // it seems to return -EPERM, so we accept that too. + can_read = false; + break; + case -EINTR: + // The call was interrupted by a signal. Just try again. + break; + default: + // Any other error is fatal. + VIXL_ABORT(); + } + } + } + // Drain the read side of the pipe. If we don't do this, we'll leak memory as + // the dummy data is buffered. As before, we expect to drain the whole write + // in one invocation, but cannot guarantee that, so we wrap it in a loop. This + // function is primarily intended to implement SVE fault-tolerant loads, so + // the maximum Z register size is a good default buffer size. + char buffer[kZRegMaxSizeInBytes]; + while (written > 0) { + ssize_t result = read(dummy_pipe_fd_[0], + reinterpret_cast<void*>(buffer), + sizeof(buffer)); + // `read` blocks, and returns 0 only at EOF. We should not hit EOF until + // we've read everything that was written, so treat 0 as an error. + if (result > 0) { + VIXL_ASSERT(static_cast<size_t>(result) <= written); + written -= result; + } else { + // For -EINTR, just try again. We can't handle any other error. + VIXL_CHECK(result == -EINTR); + } } -} + return can_read; +} void Simulator::PrintExclusiveAccessWarning() { if (print_exclusive_access_warning_) { @@ -1971,7 +2592,6 @@ void Simulator::PrintExclusiveAccessWarning() { } } - void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { LoadStoreExclusive op = static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask)); @@ -2045,30 +2665,35 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). // We will print a more detailed log. + unsigned reg_size = 0; switch (op) { case LDXRB_w: case LDAXRB_w: case LDARB_w: case LDLARB: WriteWRegister(rt, Memory::Read<uint8_t>(address), NoRegLog); + reg_size = kWRegSizeInBytes; break; case LDXRH_w: case LDAXRH_w: case LDARH_w: case LDLARH: WriteWRegister(rt, Memory::Read<uint16_t>(address), NoRegLog); + reg_size = kWRegSizeInBytes; break; case LDXR_w: case LDAXR_w: case LDAR_w: case LDLAR_w: WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog); + reg_size = kWRegSizeInBytes; break; case LDXR_x: case LDAXR_x: case LDAR_x: case LDLAR_x: WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog); + reg_size = kXRegSizeInBytes; break; case LDXP_w: case LDAXP_w: @@ -2076,6 +2701,7 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { WriteWRegister(rt2, Memory::Read<uint32_t>(address + element_size), NoRegLog); + reg_size = kWRegSizeInBytes; break; case LDXP_x: case LDAXP_x: @@ -2083,6 +2709,7 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { WriteXRegister(rt2, Memory::Read<uint64_t>(address + element_size), NoRegLog); + reg_size = kXRegSizeInBytes; break; default: VIXL_UNREACHABLE(); @@ -2093,11 +2720,10 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { __sync_synchronize(); } - LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size); + LogExtendingRead(rt, format, element_size, address); if (is_pair) { - LogRead(address + element_size, - rt2, - GetPrintRegisterFormatForSize(element_size)); + LogExtendingRead(rt2, format, element_size, address + element_size); } } else { if (is_acquire_release) { @@ -2161,11 +2787,11 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { VIXL_UNREACHABLE(); } - LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + PrintRegisterFormat format = + GetPrintRegisterFormatForSize(element_size); + LogWrite(rt, format, address); if (is_pair) { - LogWrite(address + element_size, - rt2, - GetPrintRegisterFormatForSize(element_size)); + LogWrite(rt2, format, address + element_size); } } } @@ -2232,8 +2858,9 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) { Memory::Write<T>(address, result); WriteRegister<T>(rt, data, NoRegLog); - LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); - LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size)); + PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); + LogRead(rt, format, address); + LogWrite(rs, format, address); } template <typename T> @@ -2264,8 +2891,9 @@ void Simulator::AtomicMemorySwapHelper(const Instruction* instr) { WriteRegister<T>(rt, data); - LogRead(address, rt, GetPrintRegisterFormat(element_size)); - LogWrite(address, rs, GetPrintRegisterFormat(element_size)); + PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); + LogRead(rt, format, address); + LogWrite(rs, format, address); } template <typename T> @@ -2283,7 +2911,7 @@ void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) { // Approximate load-acquire by issuing a full barrier after the load. __sync_synchronize(); - LogRead(address, rt, GetPrintRegisterFormat(element_size)); + LogRead(rt, GetPrintRegisterFormatForSize(element_size), address); } #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \ @@ -2400,27 +3028,27 @@ void Simulator::VisitLoadLiteral(const Instruction* instr) { // print a more detailed log. case LDR_w_lit: WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog); - LogRead(address, rt, kPrintWReg); + LogRead(rt, kPrintWReg, address); break; case LDR_x_lit: WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog); - LogRead(address, rt, kPrintXReg); + LogRead(rt, kPrintXReg, address); break; case LDR_s_lit: WriteSRegister(rt, Memory::Read<float>(address), NoRegLog); - LogVRead(address, rt, kPrintSReg); + LogVRead(rt, kPrintSRegFP, address); break; case LDR_d_lit: WriteDRegister(rt, Memory::Read<double>(address), NoRegLog); - LogVRead(address, rt, kPrintDReg); + LogVRead(rt, kPrintDRegFP, address); break; case LDR_q_lit: WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog); - LogVRead(address, rt, kPrintReg1Q); + LogVRead(rt, kPrintReg1Q, address); break; case LDRSW_x_lit: WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog); - LogRead(address, rt, kPrintWReg); + LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address); break; // Ignore prfm hint instructions. @@ -2795,40 +3423,6 @@ void Simulator::VisitDataProcessing2Source(const Instruction* instr) { } -// The algorithm used is adapted from the one described in section 8.2 of -// Hacker's Delight, by Henry S. Warren, Jr. -template <typename T> -static int64_t MultiplyHigh(T u, T v) { - uint64_t u0, v0, w0, u1, v1, w1, w2, t; - uint64_t sign_mask = UINT64_C(0x8000000000000000); - uint64_t sign_ext = 0; - if (std::numeric_limits<T>::is_signed) { - sign_ext = UINT64_C(0xffffffff00000000); - } - - VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); - VIXL_ASSERT(sizeof(u) == sizeof(u0)); - - u0 = u & 0xffffffff; - u1 = u >> 32 | (((u & sign_mask) != 0) ? sign_ext : 0); - v0 = v & 0xffffffff; - v1 = v >> 32 | (((v & sign_mask) != 0) ? sign_ext : 0); - - w0 = u0 * v0; - t = u1 * v0 + (w0 >> 32); - - w1 = t & 0xffffffff; - w2 = t >> 32 | (((t & sign_mask) != 0) ? sign_ext : 0); - w1 = u0 * v1 + w1; - w1 = w1 >> 32 | (((w1 & sign_mask) != 0) ? sign_ext : 0); - - uint64_t value = u1 * v1 + w2 + w1; - int64_t result; - memcpy(&result, &value, sizeof(result)); - return result; -} - - void Simulator::VisitDataProcessing3Source(const Instruction* instr) { unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize; @@ -2864,12 +3458,13 @@ void Simulator::VisitDataProcessing3Source(const Instruction* instr) { result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32); break; case UMULH_x: - result = MultiplyHigh(ReadRegister<uint64_t>(instr->GetRn()), - ReadRegister<uint64_t>(instr->GetRm())); + result = + internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()), + ReadRegister<uint64_t>(instr->GetRm())); break; case SMULH_x: - result = MultiplyHigh(ReadXRegister(instr->GetRn()), - ReadXRegister(instr->GetRm())); + result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()), + ReadXRegister(instr->GetRm())); break; default: VIXL_UNIMPLEMENTED(); @@ -2936,9 +3531,10 @@ void Simulator::VisitExtract(const Instruction* instr) { unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize; uint64_t low_res = static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb; - uint64_t high_res = - (lsb == 0) ? 0 : ReadRegister<uint64_t>(reg_size, instr->GetRn()) - << (reg_size - lsb); + uint64_t high_res = (lsb == 0) + ? 0 + : ReadRegister<uint64_t>(reg_size, instr->GetRn()) + << (reg_size - lsb); WriteRegister(reg_size, instr->GetRd(), low_res | high_res); } @@ -3948,8 +4544,8 @@ void Simulator::VisitSystem(const Instruction* instr) { break; case RNDR: case RNDRRS: { - uint64_t high = jrand48(rndr_state_); - uint64_t low = jrand48(rndr_state_); + uint64_t high = jrand48(rand_state_); + uint64_t low = jrand48(rand_state_); uint64_t rand_num = (high << 32) | (low & 0xffffffff); WriteXRegister(instr->GetRt(), rand_num); // Simulate successful random number generation. @@ -4530,10 +5126,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) { fminnm(vf, rd, rn, rm); break; case NEON_FMLA: - fmla(vf, rd, rn, rm); + fmla(vf, rd, rd, rn, rm); break; case NEON_FMLS: - fmls(vf, rd, rn, rm); + fmls(vf, rd, rd, rn, rm); break; case NEON_FMULX: fmulx(vf, rd, rn, rm); @@ -4624,10 +5220,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) { cmptst(vf, rd, rn, rm); break; case NEON_MLS: - mls(vf, rd, rn, rm); + mls(vf, rd, rd, rn, rm); break; case NEON_MLA: - mla(vf, rd, rn, rm); + mla(vf, rd, rd, rn, rm); break; case NEON_MUL: mul(vf, rd, rn, rm); @@ -4754,13 +5350,11 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) { B(vf, rd, rn, rm); \ break; SIM_FUNC(FMAXNM, fmaxnm); - SIM_FUNC(FMLA, fmla); SIM_FUNC(FADD, fadd); SIM_FUNC(FMULX, fmulx); SIM_FUNC(FMAX, fmax); SIM_FUNC(FRECPS, frecps); SIM_FUNC(FMINNM, fminnm); - SIM_FUNC(FMLS, fmls); SIM_FUNC(FSUB, fsub); SIM_FUNC(FMIN, fmin); SIM_FUNC(FRSQRTS, frsqrts); @@ -4773,6 +5367,12 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) { SIM_FUNC(FABD, fabd); SIM_FUNC(FMINP, fminp); #undef SIM_FUNC + case NEON_FMLA_H: + fmla(vf, rd, rd, rn, rm); + break; + case NEON_FMLS_H: + fmls(vf, rd, rd, rn, rm); + break; case NEON_FCMEQ_H: fcmp(vf, rd, rn, rm, eq); break; @@ -4803,7 +5403,7 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) { VectorFormat vf = nfd.GetVectorFormat(); if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) { rot = instr->GetImmRotFcmlaVec(); - fcmla(vf, rd, rn, rm, rot); + fcmla(vf, rd, rn, rm, rd, rot); } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) { rot = instr->GetImmRotFcadd(); fcadd(vf, rd, rn, rm, rot); @@ -5347,7 +5947,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters; addr[i] = addr_base + (i * reg_size); } - int count = 1; + int struct_parts = 1; + int reg_count = 1; bool log_read = true; // Bit 23 determines whether this is an offset or post-index addressing mode. @@ -5363,17 +5964,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, case NEON_LD1_4v: case NEON_LD1_4v_post: ld1(vf, ReadVRegister(reg[3]), addr[3]); - count++; + reg_count++; VIXL_FALLTHROUGH(); case NEON_LD1_3v: case NEON_LD1_3v_post: ld1(vf, ReadVRegister(reg[2]), addr[2]); - count++; + reg_count++; VIXL_FALLTHROUGH(); case NEON_LD1_2v: case NEON_LD1_2v_post: ld1(vf, ReadVRegister(reg[1]), addr[1]); - count++; + reg_count++; VIXL_FALLTHROUGH(); case NEON_LD1_1v: case NEON_LD1_1v_post: @@ -5382,17 +5983,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, case NEON_ST1_4v: case NEON_ST1_4v_post: st1(vf, ReadVRegister(reg[3]), addr[3]); - count++; + reg_count++; VIXL_FALLTHROUGH(); case NEON_ST1_3v: case NEON_ST1_3v_post: st1(vf, ReadVRegister(reg[2]), addr[2]); - count++; + reg_count++; VIXL_FALLTHROUGH(); case NEON_ST1_2v: case NEON_ST1_2v_post: st1(vf, ReadVRegister(reg[1]), addr[1]); - count++; + reg_count++; VIXL_FALLTHROUGH(); case NEON_ST1_1v: case NEON_ST1_1v_post: @@ -5402,12 +6003,14 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, case NEON_LD2_post: case NEON_LD2: ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]); - count = 2; + struct_parts = 2; + reg_count = 2; break; case NEON_ST2: case NEON_ST2_post: st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]); - count = 2; + struct_parts = 2; + reg_count = 2; log_read = false; break; case NEON_LD3_post: @@ -5417,7 +6020,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[1]), ReadVRegister(reg[2]), addr[0]); - count = 3; + struct_parts = 3; + reg_count = 3; break; case NEON_ST3: case NEON_ST3_post: @@ -5426,7 +6030,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[1]), ReadVRegister(reg[2]), addr[0]); - count = 3; + struct_parts = 3; + reg_count = 3; log_read = false; break; case NEON_ST4: @@ -5437,7 +6042,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[2]), ReadVRegister(reg[3]), addr[0]); - count = 4; + struct_parts = 4; + reg_count = 4; log_read = false; break; case NEON_LD4_post: @@ -5448,22 +6054,31 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[2]), ReadVRegister(reg[3]), addr[0]); - count = 4; + struct_parts = 4; + reg_count = 4; break; default: VIXL_UNIMPLEMENTED(); } - // Explicitly log the register update whilst we have type information. - for (int i = 0; i < count; i++) { - // For de-interleaving loads, only print the base address. - int lane_size = LaneSizeInBytesFromFormat(vf); - PrintRegisterFormat format = GetPrintRegisterFormatTryFP( - GetPrintRegisterFormatForSize(reg_size, lane_size)); + bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites(); + if (do_trace) { + PrintRegisterFormat print_format = + GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); + const char* op; if (log_read) { - LogVRead(addr_base, reg[i], format); + op = "<-"; } else { - LogVWrite(addr_base, reg[i], format); + op = "->"; + // Stores don't represent a change to the source register's value, so only + // print the relevant part of the value. + print_format = GetPrintRegPartial(print_format); + } + + VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1)); + for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) { + uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf)); + PrintVStructAccess(reg[s], struct_parts, print_format, op, address); } } @@ -5471,7 +6086,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, int rm = instr->GetRm(); // The immediate post index addressing mode is indicated by rm = 31. // The immediate is implied by the number of vector registers used. - addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count + addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count) : ReadXRegister(rm); WriteXRegister(instr->GetRn(), addr_base); } else { @@ -5507,6 +6122,8 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, // and PostIndex addressing. bool do_load = false; + bool replicating = false; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); VectorFormat vf_t = nfd.GetVectorFormat(); @@ -5581,99 +6198,67 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, } case NEON_LD1R: - case NEON_LD1R_post: { - vf = vf_t; - ld1r(vf, ReadVRegister(rt), addr); - do_load = true; - break; - } - + case NEON_LD1R_post: case NEON_LD2R: - case NEON_LD2R_post: { - vf = vf_t; - int rt2 = (rt + 1) % kNumberOfVRegisters; - ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr); - do_load = true; - break; - } - + case NEON_LD2R_post: case NEON_LD3R: - case NEON_LD3R_post: { - vf = vf_t; - int rt2 = (rt + 1) % kNumberOfVRegisters; - int rt3 = (rt2 + 1) % kNumberOfVRegisters; - ld3r(vf, ReadVRegister(rt), ReadVRegister(rt2), ReadVRegister(rt3), addr); - do_load = true; - break; - } - + case NEON_LD3R_post: case NEON_LD4R: - case NEON_LD4R_post: { + case NEON_LD4R_post: vf = vf_t; - int rt2 = (rt + 1) % kNumberOfVRegisters; - int rt3 = (rt2 + 1) % kNumberOfVRegisters; - int rt4 = (rt3 + 1) % kNumberOfVRegisters; - ld4r(vf, - ReadVRegister(rt), - ReadVRegister(rt2), - ReadVRegister(rt3), - ReadVRegister(rt4), - addr); do_load = true; + replicating = true; break; - } + default: VIXL_UNIMPLEMENTED(); } - PrintRegisterFormat print_format = - GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); - // Make sure that the print_format only includes a single lane. - print_format = - static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask); - - int esize = LaneSizeInBytesFromFormat(vf); int index_shift = LaneSizeInBytesLog2FromFormat(vf); int lane = instr->GetNEONLSIndex(index_shift); - int scale = 0; + int reg_count = 0; int rt2 = (rt + 1) % kNumberOfVRegisters; int rt3 = (rt2 + 1) % kNumberOfVRegisters; int rt4 = (rt3 + 1) % kNumberOfVRegisters; switch (instr->Mask(NEONLoadStoreSingleLenMask)) { case NEONLoadStoreSingle1: - scale = 1; - if (do_load) { + reg_count = 1; + if (replicating) { + VIXL_ASSERT(do_load); + ld1r(vf, ReadVRegister(rt), addr); + } else if (do_load) { ld1(vf, ReadVRegister(rt), lane, addr); - LogVRead(addr, rt, print_format, lane); } else { st1(vf, ReadVRegister(rt), lane, addr); - LogVWrite(addr, rt, print_format, lane); } break; case NEONLoadStoreSingle2: - scale = 2; - if (do_load) { + reg_count = 2; + if (replicating) { + VIXL_ASSERT(do_load); + ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr); + } else if (do_load) { ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr); - LogVRead(addr, rt, print_format, lane); - LogVRead(addr + esize, rt2, print_format, lane); } else { st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr); - LogVWrite(addr, rt, print_format, lane); - LogVWrite(addr + esize, rt2, print_format, lane); } break; case NEONLoadStoreSingle3: - scale = 3; - if (do_load) { + reg_count = 3; + if (replicating) { + VIXL_ASSERT(do_load); + ld3r(vf, + ReadVRegister(rt), + ReadVRegister(rt2), + ReadVRegister(rt3), + addr); + } else if (do_load) { ld3(vf, ReadVRegister(rt), ReadVRegister(rt2), ReadVRegister(rt3), lane, addr); - LogVRead(addr, rt, print_format, lane); - LogVRead(addr + esize, rt2, print_format, lane); - LogVRead(addr + (2 * esize), rt3, print_format, lane); } else { st3(vf, ReadVRegister(rt), @@ -5681,14 +6266,19 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, ReadVRegister(rt3), lane, addr); - LogVWrite(addr, rt, print_format, lane); - LogVWrite(addr + esize, rt2, print_format, lane); - LogVWrite(addr + (2 * esize), rt3, print_format, lane); } break; case NEONLoadStoreSingle4: - scale = 4; - if (do_load) { + reg_count = 4; + if (replicating) { + VIXL_ASSERT(do_load); + ld4r(vf, + ReadVRegister(rt), + ReadVRegister(rt2), + ReadVRegister(rt3), + ReadVRegister(rt4), + addr); + } else if (do_load) { ld4(vf, ReadVRegister(rt), ReadVRegister(rt2), @@ -5696,10 +6286,6 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, ReadVRegister(rt4), lane, addr); - LogVRead(addr, rt, print_format, lane); - LogVRead(addr + esize, rt2, print_format, lane); - LogVRead(addr + (2 * esize), rt3, print_format, lane); - LogVRead(addr + (3 * esize), rt4, print_format, lane); } else { st4(vf, ReadVRegister(rt), @@ -5708,22 +6294,38 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, ReadVRegister(rt4), lane, addr); - LogVWrite(addr, rt, print_format, lane); - LogVWrite(addr + esize, rt2, print_format, lane); - LogVWrite(addr + (2 * esize), rt3, print_format, lane); - LogVWrite(addr + (3 * esize), rt4, print_format, lane); } break; default: VIXL_UNIMPLEMENTED(); } + // Trace registers and/or memory writes. + PrintRegisterFormat print_format = + GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); + if (do_load) { + if (ShouldTraceVRegs()) { + if (replicating) { + PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr); + } else { + PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr); + } + } + } else { + if (ShouldTraceWrites()) { + // Stores don't represent a change to the source register's value, so only + // print the relevant part of the value. + print_format = GetPrintRegPartial(print_format); + PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr); + } + } + if (addr_mode == PostIndex) { int rm = instr->GetRm(); int lane_size = LaneSizeInBytesFromFormat(vf); WriteXRegister(instr->GetRn(), - addr + - ((rm == 31) ? (scale * lane_size) : ReadXRegister(rm))); + addr + ((rm == 31) ? (reg_count * lane_size) + : ReadXRegister(rm))); } } @@ -6421,10 +7023,10 @@ void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) { NEONFormatDecoder nfd(instr, &map); VectorFormat vf = nfd.GetVectorFormat(); - int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh()); - int immhimmb = instr->GetImmNEONImmhImmb(); - int right_shift = (16 << highestSetBit) - immhimmb; - int left_shift = immhimmb - (8 << highestSetBit); + int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh()); + int immh_immb = instr->GetImmNEONImmhImmb(); + int right_shift = (16 << highest_set_bit) - immh_immb; + int left_shift = immh_immb - (8 << highest_set_bit); switch (instr->Mask(NEONScalarShiftImmediateMask)) { case NEON_SHL_scalar: shl(vf, rd, rn, left_shift); @@ -6529,10 +7131,10 @@ void Simulator::VisitNEONShiftImmediate(const Instruction* instr) { {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}}; VectorFormat vf_l = nfd.GetVectorFormat(&map_l); - int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh()); - int immhimmb = instr->GetImmNEONImmhImmb(); - int right_shift = (16 << highestSetBit) - immhimmb; - int left_shift = immhimmb - (8 << highestSetBit); + int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh()); + int immh_immb = instr->GetImmNEONImmhImmb(); + int right_shift = (16 << highest_set_bit) - immh_immb; + int left_shift = immh_immb - (8 << highest_set_bit); switch (instr->Mask(NEONShiftImmediateMask)) { case NEON_SHL: @@ -6741,6 +7343,4356 @@ void Simulator::VisitNEONPerm(const Instruction* instr) { } } +void Simulator::VisitSVEAddressGeneration(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister temp; + + VectorFormat vform = kFormatVnD; + mov(vform, temp, zm); + + switch (instr->Mask(SVEAddressGenerationMask)) { + case ADR_z_az_d_s32_scaled: + sxt(vform, temp, temp, kSRegSize); + break; + case ADR_z_az_d_u32_scaled: + uxt(vform, temp, temp, kSRegSize); + break; + case ADR_z_az_s_same_scaled: + vform = kFormatVnS; + break; + case ADR_z_az_d_same_scaled: + // Nothing to do. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + int shift_amount = instr->ExtractBits(11, 10); + shl(vform, temp, temp, shift_amount); + add(vform, zd, zn, temp); +} + +void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated( + const Instruction* instr) { + Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask); + switch (op) { + case AND_z_zi: + case EOR_z_zi: + case ORR_z_zi: { + int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); + uint64_t imm = instr->GetSVEImmLogical(); + // Valid immediate is a non-zero bits + VIXL_ASSERT(imm != 0); + SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>( + op), + SVEFormatFromLaneSizeInBytesLog2(lane_size), + ReadVRegister(instr->GetRd()), + imm); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) { + switch (instr->Mask(SVEBroadcastBitmaskImmMask)) { + case DUPM_z_i: { + /* DUPM uses the same lane size and immediate encoding as bitwise logical + * immediate instructions. */ + int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); + uint64_t imm = instr->GetSVEImmLogical(); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + dup_immediate(vform, ReadVRegister(instr->GetRd()), imm); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask); + + LogicalOp logical_op; + switch (op) { + case AND_z_zz: + logical_op = AND; + break; + case BIC_z_zz: + logical_op = BIC; + break; + case EOR_z_zz: + logical_op = EOR; + break; + case ORR_z_zz: + logical_op = ORR; + break; + default: + logical_op = LogicalOpMask; + VIXL_UNIMPLEMENTED(); + break; + } + // Lane size of registers is irrelevant to the bitwise operations, so perform + // the operation on D-sized lanes. + SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm); +} + +void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) { + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + SimVRegister scratch; + SimVRegister result; + + bool for_division = false; + Shift shift_op = NO_SHIFT; + switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) { + case ASRD_z_p_zi: + shift_op = ASR; + for_division = true; + break; + case ASR_z_p_zi: + shift_op = ASR; + break; + case LSL_z_p_zi: + shift_op = LSL; + break; + case LSR_z_p_zi: + shift_op = LSR; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true); + unsigned lane_size = shift_and_lane_size.second; + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int shift_dist = shift_and_lane_size.first; + + if ((shift_op == ASR) && for_division) { + asrd(vform, result, zdn, shift_dist); + } else { + if (shift_op == LSL) { + // Shift distance is computed differently for LSL. Convert the result. + shift_dist = (8 << lane_size) - shift_dist; + } + dup_immediate(vform, scratch, shift_dist); + SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false); + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEBitwiseShiftByVector_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + SimVRegister result; + SimVRegister shiftand; // Vector to be shifted. + SimVRegister shiftor; // Vector shift amount. + + Shift shift_op = ASR; + mov(vform, shiftand, zdn); + mov(vform, shiftor, zm); + + switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) { + case ASRR_z_p_zz: + mov(vform, shiftand, zm); + mov(vform, shiftor, zdn); + VIXL_FALLTHROUGH(); + case ASR_z_p_zz: + break; + case LSLR_z_p_zz: + mov(vform, shiftand, zm); + mov(vform, shiftor, zdn); + VIXL_FALLTHROUGH(); + case LSL_z_p_zz: + shift_op = LSL; + break; + case LSRR_z_p_zz: + mov(vform, shiftand, zm); + mov(vform, shiftor, zdn); + VIXL_FALLTHROUGH(); + case LSR_z_p_zz: + shift_op = LSR; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + SVEBitwiseShiftHelper(shift_op, + vform, + result, + shiftand, + shiftor, + /* is_wide_elements = */ false); + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + SimVRegister result; + Shift shift_op = ASR; + + switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { + case ASR_z_p_zw: + break; + case LSL_z_p_zw: + shift_op = LSL; + break; + case LSR_z_p_zw: + shift_op = LSR; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + SVEBitwiseShiftHelper(shift_op, + vform, + result, + zdn, + zm, + /* is_wide_elements = */ true); + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + Shift shift_op; + switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { + case ASR_z_zi: + case ASR_z_zw: + shift_op = ASR; + break; + case LSL_z_zi: + case LSL_z_zw: + shift_op = LSL; + break; + case LSR_z_zi: + case LSR_z_zw: + shift_op = LSR; + break; + default: + shift_op = NO_SHIFT; + VIXL_UNIMPLEMENTED(); + break; + } + + switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { + case ASR_z_zi: + case LSL_z_zi: + case LSR_z_zi: { + SimVRegister scratch; + std::pair<int, int> shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + unsigned lane_size = shift_and_lane_size.second; + VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); + int shift_dist = shift_and_lane_size.first; + if (shift_op == LSL) { + // Shift distance is computed differently for LSL. Convert the result. + shift_dist = (8 << lane_size) - shift_dist; + } + dup_immediate(vform, scratch, shift_dist); + SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false); + break; + } + case ASR_z_zw: + case LSL_z_zw: + case LSR_z_zw: + SVEBitwiseShiftHelper(shift_op, + instr->GetSVEVectorFormat(), + zd, + zn, + ReadVRegister(instr->GetRm()), + true); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) { + // Although the instructions have a separate encoding class, the lane size is + // encoded in the same way as most other SVE instructions. + VectorFormat vform = instr->GetSVEVectorFormat(); + + int pattern = instr->GetImmSVEPredicateConstraint(); + int count = GetPredicateConstraintLaneCount(vform, pattern); + int multiplier = instr->ExtractBits(19, 16) + 1; + + switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) { + case DECB_r_rs: + case DECD_r_rs: + case DECH_r_rs: + case DECW_r_rs: + count = -count; + break; + case INCB_r_rs: + case INCD_r_rs: + case INCH_r_rs: + case INCW_r_rs: + // Nothing to do. + break; + default: + VIXL_UNIMPLEMENTED(); + return; + } + + WriteXRegister(instr->GetRd(), + IncDecN(ReadXRegister(instr->GetRd()), + count * multiplier, + kXRegSize)); +} + +void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { + VIXL_UNIMPLEMENTED(); + } + + int pattern = instr->GetImmSVEPredicateConstraint(); + int count = GetPredicateConstraintLaneCount(vform, pattern); + int multiplier = instr->ExtractBits(19, 16) + 1; + + switch (instr->Mask(SVEIncDecVectorByElementCountMask)) { + case DECD_z_zs: + case DECH_z_zs: + case DECW_z_zs: + count = -count; + break; + case INCD_z_zs: + case INCH_z_zs: + case INCW_z_zs: + // Nothing to do. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister scratch; + dup_immediate(vform, + scratch, + IncDecN(0, + count * multiplier, + LaneSizeInBitsFromFormat(vform))); + add(vform, zd, zd, scratch); +} + +void Simulator::VisitSVESaturatingIncDecRegisterByElementCount( + const Instruction* instr) { + // Although the instructions have a separate encoding class, the lane size is + // encoded in the same way as most other SVE instructions. + VectorFormat vform = instr->GetSVEVectorFormat(); + + int pattern = instr->GetImmSVEPredicateConstraint(); + int count = GetPredicateConstraintLaneCount(vform, pattern); + int multiplier = instr->ExtractBits(19, 16) + 1; + + unsigned width = kXRegSize; + bool is_signed = false; + + switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) { + case SQDECB_r_rs_sx: + case SQDECD_r_rs_sx: + case SQDECH_r_rs_sx: + case SQDECW_r_rs_sx: + width = kWRegSize; + VIXL_FALLTHROUGH(); + case SQDECB_r_rs_x: + case SQDECD_r_rs_x: + case SQDECH_r_rs_x: + case SQDECW_r_rs_x: + is_signed = true; + count = -count; + break; + case SQINCB_r_rs_sx: + case SQINCD_r_rs_sx: + case SQINCH_r_rs_sx: + case SQINCW_r_rs_sx: + width = kWRegSize; + VIXL_FALLTHROUGH(); + case SQINCB_r_rs_x: + case SQINCD_r_rs_x: + case SQINCH_r_rs_x: + case SQINCW_r_rs_x: + is_signed = true; + break; + case UQDECB_r_rs_uw: + case UQDECD_r_rs_uw: + case UQDECH_r_rs_uw: + case UQDECW_r_rs_uw: + width = kWRegSize; + VIXL_FALLTHROUGH(); + case UQDECB_r_rs_x: + case UQDECD_r_rs_x: + case UQDECH_r_rs_x: + case UQDECW_r_rs_x: + count = -count; + break; + case UQINCB_r_rs_uw: + case UQINCD_r_rs_uw: + case UQINCH_r_rs_uw: + case UQINCW_r_rs_uw: + width = kWRegSize; + VIXL_FALLTHROUGH(); + case UQINCB_r_rs_x: + case UQINCD_r_rs_x: + case UQINCH_r_rs_x: + case UQINCW_r_rs_x: + // Nothing to do. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + WriteXRegister(instr->GetRd(), + IncDecN(ReadXRegister(instr->GetRd()), + count * multiplier, + width, + true, + is_signed)); +} + +void Simulator::VisitSVESaturatingIncDecVectorByElementCount( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { + VIXL_UNIMPLEMENTED(); + } + + int pattern = instr->GetImmSVEPredicateConstraint(); + int count = GetPredicateConstraintLaneCount(vform, pattern); + int multiplier = instr->ExtractBits(19, 16) + 1; + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister scratch; + dup_immediate(vform, + scratch, + IncDecN(0, + count * multiplier, + LaneSizeInBitsFromFormat(vform))); + + switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) { + case SQDECD_z_zs: + case SQDECH_z_zs: + case SQDECW_z_zs: + sub(vform, zd, zd, scratch).SignedSaturate(vform); + break; + case SQINCD_z_zs: + case SQINCH_z_zs: + case SQINCW_z_zs: + add(vform, zd, zd, scratch).SignedSaturate(vform); + break; + case UQDECD_z_zs: + case UQDECH_z_zs: + case UQDECW_z_zs: + sub(vform, zd, zd, scratch).UnsignedSaturate(vform); + break; + case UQINCD_z_zs: + case UQINCH_z_zs: + case UQINCW_z_zs: + add(vform, zd, zd, scratch).UnsignedSaturate(vform); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEElementCount(const Instruction* instr) { + switch (instr->Mask(SVEElementCountMask)) { + case CNTB_r_s: + case CNTD_r_s: + case CNTH_r_s: + case CNTW_r_s: + // All handled below. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + // Although the instructions are separated, the lane size is encoded in the + // same way as most other SVE instructions. + VectorFormat vform = instr->GetSVEVectorFormat(); + + int pattern = instr->GetImmSVEPredicateConstraint(); + int count = GetPredicateConstraintLaneCount(vform, pattern); + int multiplier = instr->ExtractBits(19, 16) + 1; + WriteXRegister(instr->GetRd(), count * multiplier); +} + +void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& vdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + switch (instr->Mask(SVEFPAccumulatingReductionMask)) { + case FADDA_v_p_z: + fadda(vform, vdn, pg, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + SimVRegister result; + + switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) { + case FABD_z_p_zz: + fabd(vform, result, zdn, zm); + break; + case FADD_z_p_zz: + fadd(vform, result, zdn, zm); + break; + case FDIVR_z_p_zz: + fdiv(vform, result, zm, zdn); + break; + case FDIV_z_p_zz: + fdiv(vform, result, zdn, zm); + break; + case FMAXNM_z_p_zz: + fmaxnm(vform, result, zdn, zm); + break; + case FMAX_z_p_zz: + fmax(vform, result, zdn, zm); + break; + case FMINNM_z_p_zz: + fminnm(vform, result, zdn, zm); + break; + case FMIN_z_p_zz: + fmin(vform, result, zdn, zm); + break; + case FMULX_z_p_zz: + fmulx(vform, result, zdn, zm); + break; + case FMUL_z_p_zz: + fmul(vform, result, zdn, zm); + break; + case FSCALE_z_p_zz: + fscale(vform, result, zdn, zm); + break; + case FSUBR_z_p_zz: + fsub(vform, result, zm, zdn); + break; + case FSUB_z_p_zz: + fsub(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEFPArithmeticWithImm_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { + VIXL_UNIMPLEMENTED(); + } + + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + int i1 = instr->ExtractBit(5); + SimVRegister add_sub_imm, min_max_imm, mul_imm; + uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5); + uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0); + uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0); + dup_immediate(vform, add_sub_imm, i1 ? one : half); + dup_immediate(vform, min_max_imm, i1 ? one : 0); + dup_immediate(vform, mul_imm, i1 ? two : half); + + switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) { + case FADD_z_p_zs: + fadd(vform, result, zdn, add_sub_imm); + break; + case FMAXNM_z_p_zs: + fmaxnm(vform, result, zdn, min_max_imm); + break; + case FMAX_z_p_zs: + fmax(vform, result, zdn, min_max_imm); + break; + case FMINNM_z_p_zs: + fminnm(vform, result, zdn, min_max_imm); + break; + case FMIN_z_p_zs: + fmin(vform, result, zdn, min_max_imm); + break; + case FMUL_z_p_zs: + fmul(vform, result, zdn, mul_imm); + break; + case FSUBR_z_p_zs: + fsub(vform, result, add_sub_imm, zdn); + break; + case FSUB_z_p_zs: + fsub(vform, result, zdn, add_sub_imm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + + switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) { + case FTMAD_z_zzi: + ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) { + case FADD_z_zz: + fadd(vform, zd, zn, zm); + break; + case FMUL_z_zz: + fmul(vform, zd, zn, zm); + break; + case FRECPS_z_zz: + frecps(vform, zd, zn, zm); + break; + case FRSQRTS_z_zz: + frsqrts(vform, zd, zn, zm); + break; + case FSUB_z_zz: + fsub(vform, zd, zn, zm); + break; + case FTSMUL_z_zz: + ftsmul(vform, zd, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) { + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister result; + + switch (instr->Mask(SVEFPCompareVectorsMask)) { + case FACGE_p_p_zz: + fabscmp(vform, result, zn, zm, ge); + break; + case FACGT_p_p_zz: + fabscmp(vform, result, zn, zm, gt); + break; + case FCMEQ_p_p_zz: + fcmp(vform, result, zn, zm, eq); + break; + case FCMGE_p_p_zz: + fcmp(vform, result, zn, zm, ge); + break; + case FCMGT_p_p_zz: + fcmp(vform, result, zn, zm, gt); + break; + case FCMNE_p_p_zz: + fcmp(vform, result, zn, zm, ne); + break; + case FCMUO_p_p_zz: + fcmp(vform, result, zn, zm, uo); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + ExtractFromSimVRegister(vform, pd, result); + mov_zeroing(pd, pg, pd); +} + +void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) { + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister result; + + SimVRegister zeros; + dup_immediate(kFormatVnD, zeros, 0); + + switch (instr->Mask(SVEFPCompareWithZeroMask)) { + case FCMEQ_p_p_z0: + fcmp(vform, result, zn, zeros, eq); + break; + case FCMGE_p_p_z0: + fcmp(vform, result, zn, zeros, ge); + break; + case FCMGT_p_p_z0: + fcmp(vform, result, zn, zeros, gt); + break; + case FCMLE_p_p_z0: + fcmp(vform, result, zn, zeros, le); + break; + case FCMLT_p_p_z0: + fcmp(vform, result, zn, zeros, lt); + break; + case FCMNE_p_p_z0: + fcmp(vform, result, zn, zeros, ne); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + ExtractFromSimVRegister(vform, pd, result); + mov_zeroing(pd, pg, pd); +} + +void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + + if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { + VIXL_UNIMPLEMENTED(); + } + + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + int rot = instr->ExtractBit(16); + + SimVRegister result; + + switch (instr->Mask(SVEFPComplexAdditionMask)) { + case FCADD_z_p_zz: + fcadd(vform, result, zdn, zm, rot); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + + if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { + VIXL_UNIMPLEMENTED(); + } + + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + int rot = instr->ExtractBits(14, 13); + + SimVRegister result; + + switch (instr->Mask(SVEFPComplexMulAddMask)) { + case FCMLA_z_p_zzz: + fcmla(vform, result, zn, zm, zda, rot); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zda, pg, result); +} + +void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) { + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + int rot = instr->ExtractBits(11, 10); + unsigned zm_code = instr->GetRm(); + int index = -1; + VectorFormat vform, vform_dup; + + switch (instr->Mask(SVEFPComplexMulAddIndexMask)) { + case FCMLA_z_zzzi_h: + vform = kFormatVnH; + vform_dup = kFormatVnS; + index = zm_code >> 3; + zm_code &= 0x7; + break; + case FCMLA_z_zzzi_s: + vform = kFormatVnS; + vform_dup = kFormatVnD; + index = zm_code >> 4; + zm_code &= 0xf; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (index >= 0) { + SimVRegister temp; + dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index); + fcmla(vform, zda, zn, temp, zda, rot); + } +} + +typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + +void Simulator::VisitSVEFPFastReduction(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& vd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + int lane_size = LaneSizeInBitsFromFormat(vform); + + uint64_t inactive_value = 0; + FastReduceFn fn = nullptr; + + switch (instr->Mask(SVEFPFastReductionMask)) { + case FADDV_v_p_z: + fn = &Simulator::faddv; + break; + case FMAXNMV_v_p_z: + inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); + fn = &Simulator::fmaxnmv; + break; + case FMAXV_v_p_z: + inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity); + fn = &Simulator::fmaxv; + break; + case FMINNMV_v_p_z: + inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); + fn = &Simulator::fminnmv; + break; + case FMINV_v_p_z: + inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity); + fn = &Simulator::fminv; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SimVRegister scratch; + dup_immediate(vform, scratch, inactive_value); + mov_merging(vform, scratch, pg, zn); + if (fn != nullptr) (this->*fn)(vform, vd, scratch); +} + +void Simulator::VisitSVEFPMulIndex(const Instruction* instr) { + VectorFormat vform = kFormatUndefined; + unsigned zm_code = instr->GetRm() & 0xf; + unsigned index = instr->ExtractBits(20, 19); + + switch (instr->Mask(SVEFPMulIndexMask)) { + case FMUL_z_zzi_d: + vform = kFormatVnD; + index >>= 1; // Only bit 20 is the index for D lanes. + break; + case FMUL_z_zzi_h_i3h: + index += 4; // Bit 22 (i3h) is the top bit of index. + VIXL_FALLTHROUGH(); + case FMUL_z_zzi_h: + vform = kFormatVnH; + zm_code &= 7; // Three bits used for zm. + break; + case FMUL_z_zzi_s: + vform = kFormatVnS; + zm_code &= 7; // Three bits used for zm. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister temp; + + dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index); + fmul(vform, zd, zn, temp); +} + +void Simulator::VisitSVEFPMulAdd(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + if (instr->ExtractBit(15) == 0) { + // Floating-point multiply-accumulate writing addend. + SimVRegister& zm = ReadVRegister(instr->GetRm()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + switch (instr->Mask(SVEFPMulAddMask)) { + // zda = zda + zn * zm + case FMLA_z_p_zzz: + fmla(vform, result, zd, zn, zm); + break; + // zda = -zda + -zn * zm + case FNMLA_z_p_zzz: + fneg(vform, result, zd); + fmls(vform, result, result, zn, zm); + break; + // zda = zda + -zn * zm + case FMLS_z_p_zzz: + fmls(vform, result, zd, zn, zm); + break; + // zda = -zda + zn * zm + case FNMLS_z_p_zzz: + fneg(vform, result, zd); + fmla(vform, result, result, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } else { + // Floating-point multiply-accumulate writing multiplicand. + SimVRegister& za = ReadVRegister(instr->GetRm()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + + switch (instr->Mask(SVEFPMulAddMask)) { + // zdn = za + zdn * zm + case FMAD_z_p_zzz: + fmla(vform, result, za, zd, zm); + break; + // zdn = -za + -zdn * zm + case FNMAD_z_p_zzz: + fneg(vform, result, za); + fmls(vform, result, result, zd, zm); + break; + // zdn = za + -zdn * zm + case FMSB_z_p_zzz: + fmls(vform, result, za, zd, zm); + break; + // zdn = -za + zdn * zm + case FNMSB_z_p_zzz: + fneg(vform, result, za); + fmla(vform, result, result, zd, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } + + mov_merging(vform, zd, pg, result); +} + +void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) { + VectorFormat vform = kFormatUndefined; + unsigned zm_code = 0xffffffff; + unsigned index = 0xffffffff; + + switch (instr->Mask(SVEFPMulAddIndexMask)) { + case FMLA_z_zzzi_d: + case FMLS_z_zzzi_d: + vform = kFormatVnD; + zm_code = instr->GetRmLow16(); + // Only bit 20 is the index for D lanes. + index = instr->ExtractBit(20); + break; + case FMLA_z_zzzi_s: + case FMLS_z_zzzi_s: + vform = kFormatVnS; + zm_code = instr->GetRm() & 0x7; // Three bits used for zm. + index = instr->ExtractBits(20, 19); + break; + case FMLA_z_zzzi_h: + case FMLS_z_zzzi_h: + case FMLA_z_zzzi_h_i3h: + case FMLS_z_zzzi_h_i3h: + vform = kFormatVnH; + zm_code = instr->GetRm() & 0x7; // Three bits used for zm. + index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister temp; + + dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index); + if (instr->ExtractBit(10) == 1) { + fmls(vform, zd, zd, zn, temp); + } else { + fmla(vform, zd, zd, zn, temp); + } +} + +void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + int dst_data_size; + int src_data_size; + + switch (instr->Mask(SVEFPConvertToIntMask)) { + case FCVTZS_z_p_z_d2w: + case FCVTZU_z_p_z_d2w: + dst_data_size = kSRegSize; + src_data_size = kDRegSize; + break; + case FCVTZS_z_p_z_d2x: + case FCVTZU_z_p_z_d2x: + dst_data_size = kDRegSize; + src_data_size = kDRegSize; + break; + case FCVTZS_z_p_z_fp162h: + case FCVTZU_z_p_z_fp162h: + dst_data_size = kHRegSize; + src_data_size = kHRegSize; + break; + case FCVTZS_z_p_z_fp162w: + case FCVTZU_z_p_z_fp162w: + dst_data_size = kSRegSize; + src_data_size = kHRegSize; + break; + case FCVTZS_z_p_z_fp162x: + case FCVTZU_z_p_z_fp162x: + dst_data_size = kDRegSize; + src_data_size = kHRegSize; + break; + case FCVTZS_z_p_z_s2w: + case FCVTZU_z_p_z_s2w: + dst_data_size = kSRegSize; + src_data_size = kSRegSize; + break; + case FCVTZS_z_p_z_s2x: + case FCVTZU_z_p_z_s2x: + dst_data_size = kDRegSize; + src_data_size = kSRegSize; + break; + default: + VIXL_UNIMPLEMENTED(); + dst_data_size = 0; + src_data_size = 0; + break; + } + + VectorFormat vform = + SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); + + if (instr->ExtractBit(16) == 0) { + fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero); + } else { + fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero); + } +} + +void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + int dst_data_size; + int src_data_size; + + switch (instr->Mask(SVEFPConvertPrecisionMask)) { + case FCVT_z_p_z_d2h: + dst_data_size = kHRegSize; + src_data_size = kDRegSize; + break; + case FCVT_z_p_z_d2s: + dst_data_size = kSRegSize; + src_data_size = kDRegSize; + break; + case FCVT_z_p_z_h2d: + dst_data_size = kDRegSize; + src_data_size = kHRegSize; + break; + case FCVT_z_p_z_h2s: + dst_data_size = kSRegSize; + src_data_size = kHRegSize; + break; + case FCVT_z_p_z_s2d: + dst_data_size = kDRegSize; + src_data_size = kSRegSize; + break; + case FCVT_z_p_z_s2h: + dst_data_size = kHRegSize; + src_data_size = kSRegSize; + break; + default: + VIXL_UNIMPLEMENTED(); + dst_data_size = 0; + src_data_size = 0; + break; + } + VectorFormat vform = + SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); + + fcvt(vform, dst_data_size, src_data_size, zd, pg, zn); +} + +void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister result; + + switch (instr->Mask(SVEFPUnaryOpMask)) { + case FRECPX_z_p_z: + frecpx(vform, result, zn); + break; + case FSQRT_z_p_z: + fsqrt(vform, result, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zd, pg, result); +} + +void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = instr->GetSVEVectorFormat(); + FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); + bool exact_exception = false; + + switch (instr->Mask(SVEFPRoundToIntegralValueMask)) { + case FRINTA_z_p_z: + fpcr_rounding = FPTieAway; + break; + case FRINTI_z_p_z: + break; // Use FPCR rounding mode. + case FRINTM_z_p_z: + fpcr_rounding = FPNegativeInfinity; + break; + case FRINTN_z_p_z: + fpcr_rounding = FPTieEven; + break; + case FRINTP_z_p_z: + fpcr_rounding = FPPositiveInfinity; + break; + case FRINTX_z_p_z: + exact_exception = true; + break; + case FRINTZ_z_p_z: + fpcr_rounding = FPZero; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SimVRegister result; + frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger); + mov_merging(vform, zd, pg, result); +} + +void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); + int dst_data_size; + int src_data_size; + + switch (instr->Mask(SVEIntConvertToFPMask)) { + case SCVTF_z_p_z_h2fp16: + case UCVTF_z_p_z_h2fp16: + dst_data_size = kHRegSize; + src_data_size = kHRegSize; + break; + case SCVTF_z_p_z_w2d: + case UCVTF_z_p_z_w2d: + dst_data_size = kDRegSize; + src_data_size = kSRegSize; + break; + case SCVTF_z_p_z_w2fp16: + case UCVTF_z_p_z_w2fp16: + dst_data_size = kHRegSize; + src_data_size = kSRegSize; + break; + case SCVTF_z_p_z_w2s: + case UCVTF_z_p_z_w2s: + dst_data_size = kSRegSize; + src_data_size = kSRegSize; + break; + case SCVTF_z_p_z_x2d: + case UCVTF_z_p_z_x2d: + dst_data_size = kDRegSize; + src_data_size = kDRegSize; + break; + case SCVTF_z_p_z_x2fp16: + case UCVTF_z_p_z_x2fp16: + dst_data_size = kHRegSize; + src_data_size = kDRegSize; + break; + case SCVTF_z_p_z_x2s: + case UCVTF_z_p_z_x2s: + dst_data_size = kSRegSize; + src_data_size = kDRegSize; + break; + default: + VIXL_UNIMPLEMENTED(); + dst_data_size = 0; + src_data_size = 0; + break; + } + + VectorFormat vform = + SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); + + if (instr->ExtractBit(16) == 0) { + scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding); + } else { + ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding); + } +} + +void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); + + switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) { + case FRECPE_z_z: + frecpe(vform, zd, zn, fpcr_rounding); + break; + case FRSQRTE_z_z: + frsqrte(vform, zd, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5)); + + int count = CountActiveLanes(vform, pg); + + if (instr->ExtractBit(11) == 0) { + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + switch (instr->Mask(SVEIncDecByPredicateCountMask)) { + case DECP_z_p_z: + sub_uint(vform, zdn, zdn, count); + break; + case INCP_z_p_z: + add_uint(vform, zdn, zdn, count); + break; + case SQDECP_z_p_z: + sub_uint(vform, zdn, zdn, count).SignedSaturate(vform); + break; + case SQINCP_z_p_z: + add_uint(vform, zdn, zdn, count).SignedSaturate(vform); + break; + case UQDECP_z_p_z: + sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform); + break; + case UQINCP_z_p_z: + add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } else { + bool is_saturating = (instr->ExtractBit(18) == 0); + bool decrement = + is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16); + bool is_signed = (instr->ExtractBit(16) == 0); + bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true; + unsigned width = sf ? kXRegSize : kWRegSize; + + switch (instr->Mask(SVEIncDecByPredicateCountMask)) { + case DECP_r_p_r: + case INCP_r_p_r: + case SQDECP_r_p_r_sx: + case SQDECP_r_p_r_x: + case SQINCP_r_p_r_sx: + case SQINCP_r_p_r_x: + case UQDECP_r_p_r_uw: + case UQDECP_r_p_r_x: + case UQINCP_r_p_r_uw: + case UQINCP_r_p_r_x: + WriteXRegister(instr->GetRd(), + IncDecN(ReadXRegister(instr->GetRd()), + decrement ? -count : count, + width, + is_saturating, + is_signed)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } +} + +uint64_t Simulator::IncDecN(uint64_t acc, + int64_t delta, + unsigned n, + bool is_saturating, + bool is_signed) { + VIXL_ASSERT(n <= 64); + VIXL_ASSERT(IsIntN(n, delta)); + + uint64_t sign_mask = UINT64_C(1) << (n - 1); + uint64_t mask = GetUintMask(n); + + acc &= mask; // Ignore initial accumulator high bits. + uint64_t result = (acc + delta) & mask; + + bool result_negative = ((result & sign_mask) != 0); + + if (is_saturating) { + if (is_signed) { + bool acc_negative = ((acc & sign_mask) != 0); + bool delta_negative = delta < 0; + + // If the signs of the operands are the same, but different from the + // result, there was an overflow. + if ((acc_negative == delta_negative) && + (acc_negative != result_negative)) { + if (result_negative) { + // Saturate to [..., INT<n>_MAX]. + result_negative = false; + result = mask & ~sign_mask; // E.g. 0x000000007fffffff + } else { + // Saturate to [INT<n>_MIN, ...]. + result_negative = true; + result = ~mask | sign_mask; // E.g. 0xffffffff80000000 + } + } + } else { + if ((delta < 0) && (result > acc)) { + // Saturate to [0, ...]. + result = 0; + } else if ((delta > 0) && (result < acc)) { + // Saturate to [..., UINT<n>_MAX]. + result = mask; + } + } + } + + // Sign-extend if necessary. + if (result_negative && is_signed) result |= ~mask; + + return result; +} + +void Simulator::VisitSVEIndexGeneration(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + switch (instr->Mask(SVEIndexGenerationMask)) { + case INDEX_z_ii: + case INDEX_z_ir: + case INDEX_z_ri: + case INDEX_z_rr: { + uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn()) + : instr->ExtractSignedBits(9, 5); + uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm()) + : instr->ExtractSignedBits(20, 16); + index(vform, zd, start, step); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) { + case ADD_z_zz: + add(vform, zd, zn, zm); + break; + case SQADD_z_zz: + add(vform, zd, zn, zm).SignedSaturate(vform); + break; + case SQSUB_z_zz: + sub(vform, zd, zn, zm).SignedSaturate(vform); + break; + case SUB_z_zz: + sub(vform, zd, zn, zm); + break; + case UQADD_z_zz: + add(vform, zd, zn, zm).UnsignedSaturate(vform); + break; + case UQSUB_z_zz: + sub(vform, zd, zn, zm).UnsignedSaturate(vform); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIntAddSubtractVectors_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) { + case ADD_z_p_zz: + add(vform, result, zdn, zm); + break; + case SUBR_z_p_zz: + sub(vform, result, zm, zdn); + break; + case SUB_z_p_zz: + sub(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) { + case AND_z_p_zz: + SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm); + break; + case BIC_z_p_zz: + SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm); + break; + case EOR_z_p_zz: + SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm); + break; + case ORR_z_p_zz: + SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) { + case MUL_z_p_zz: + mul(vform, result, zdn, zm); + break; + case SMULH_z_p_zz: + smulh(vform, result, zdn, zm); + break; + case UMULH_z_p_zz: + umulh(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEIntMinMaxDifference_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) { + case SABD_z_p_zz: + absdiff(vform, result, zdn, zm, true); + break; + case SMAX_z_p_zz: + smax(vform, result, zdn, zm); + break; + case SMIN_z_p_zz: + smin(vform, result, zdn, zm); + break; + case UABD_z_p_zz: + absdiff(vform, result, zdn, zm, false); + break; + case UMAX_z_p_zz: + umax(vform, result, zdn, zm); + break; + case UMIN_z_p_zz: + umin(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister scratch; + + switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) { + case MUL_z_zi: + dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned()); + mul(vform, zd, zd, scratch); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); + + switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { + case SDIVR_z_p_zz: + sdiv(vform, result, zm, zdn); + break; + case SDIV_z_p_zz: + sdiv(vform, result, zdn, zm); + break; + case UDIVR_z_p_zz: + udiv(vform, result, zm, zdn); + break; + case UDIV_z_p_zz: + udiv(vform, result, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zdn, pg, result); +} + +void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister scratch; + + uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned(); + int64_t signed_imm = instr->GetImmSVEIntWideSigned(); + + switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) { + case SMAX_z_zi: + dup_immediate(vform, scratch, signed_imm); + smax(vform, zd, zd, scratch); + break; + case SMIN_z_zi: + dup_immediate(vform, scratch, signed_imm); + smin(vform, zd, zd, scratch); + break; + case UMAX_z_zi: + dup_immediate(vform, scratch, unsigned_imm); + umax(vform, zd, zd, scratch); + break; + case UMIN_z_zi: + dup_immediate(vform, scratch, unsigned_imm); + umin(vform, zd, zd, scratch); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIntCompareScalarCountAndLimit( + const Instruction* instr) { + unsigned rn_code = instr->GetRn(); + unsigned rm_code = instr->GetRm(); + SimPRegister& pd = ReadPRegister(instr->GetPd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + bool is_64_bit = instr->ExtractBit(12) == 1; + int64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code); + int64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code); + + bool last = true; + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + bool cond = false; + switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) { + case WHILELE_p_p_rr: + cond = src1 <= src2; + break; + case WHILELO_p_p_rr: + cond = static_cast<uint64_t>(src1) < static_cast<uint64_t>(src2); + break; + case WHILELS_p_p_rr: + cond = static_cast<uint64_t>(src1) <= static_cast<uint64_t>(src2); + break; + case WHILELT_p_p_rr: + cond = src1 < src2; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + last = last && cond; + LogicPRegister dst(pd); + dst.SetActive(vform, lane, last); + src1 += 1; + } + + PredTest(vform, GetPTrue(), pd); + LogSystemRegister(NZCV); +} + +void Simulator::VisitSVEConditionallyTerminateScalars( + const Instruction* instr) { + unsigned rn_code = instr->GetRn(); + unsigned rm_code = instr->GetRm(); + bool is_64_bit = instr->ExtractBit(22) == 1; + uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code); + uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code); + bool term; + switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) { + case CTERMEQ_rr: + term = src1 == src2; + break; + case CTERMNE_rr: + term = src1 != src2; + break; + default: + term = false; + VIXL_UNIMPLEMENTED(); + break; + } + ReadNzcv().SetN(term ? 1 : 0); + ReadNzcv().SetV(term ? 0 : !ReadC()); + LogSystemRegister(NZCV); +} + +void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) { + bool commute_inputs = false; + Condition cond; + switch (instr->Mask(SVEIntCompareSignedImmMask)) { + case CMPEQ_p_p_zi: + cond = eq; + break; + case CMPGE_p_p_zi: + cond = ge; + break; + case CMPGT_p_p_zi: + cond = gt; + break; + case CMPLE_p_p_zi: + cond = ge; + commute_inputs = true; + break; + case CMPLT_p_p_zi: + cond = gt; + commute_inputs = true; + break; + case CMPNE_p_p_zi: + cond = ne; + break; + default: + cond = al; + VIXL_UNIMPLEMENTED(); + break; + } + + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister src2; + dup_immediate(vform, + src2, + ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16))); + SVEIntCompareVectorsHelper(cond, + vform, + ReadPRegister(instr->GetPd()), + ReadPRegister(instr->GetPgLow8()), + commute_inputs ? src2 + : ReadVRegister(instr->GetRn()), + commute_inputs ? ReadVRegister(instr->GetRn()) + : src2); +} + +void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) { + bool commute_inputs = false; + Condition cond; + switch (instr->Mask(SVEIntCompareUnsignedImmMask)) { + case CMPHI_p_p_zi: + cond = hi; + break; + case CMPHS_p_p_zi: + cond = hs; + break; + case CMPLO_p_p_zi: + cond = hi; + commute_inputs = true; + break; + case CMPLS_p_p_zi: + cond = hs; + commute_inputs = true; + break; + default: + cond = al; + VIXL_UNIMPLEMENTED(); + break; + } + + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister src2; + dup_immediate(vform, src2, instr->ExtractBits(20, 14)); + SVEIntCompareVectorsHelper(cond, + vform, + ReadPRegister(instr->GetPd()), + ReadPRegister(instr->GetPgLow8()), + commute_inputs ? src2 + : ReadVRegister(instr->GetRn()), + commute_inputs ? ReadVRegister(instr->GetRn()) + : src2); +} + +void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) { + Instr op = instr->Mask(SVEIntCompareVectorsMask); + bool is_wide_elements = false; + switch (op) { + case CMPEQ_p_p_zw: + case CMPGE_p_p_zw: + case CMPGT_p_p_zw: + case CMPHI_p_p_zw: + case CMPHS_p_p_zw: + case CMPLE_p_p_zw: + case CMPLO_p_p_zw: + case CMPLS_p_p_zw: + case CMPLT_p_p_zw: + case CMPNE_p_p_zw: + is_wide_elements = true; + break; + } + + Condition cond; + switch (op) { + case CMPEQ_p_p_zw: + case CMPEQ_p_p_zz: + cond = eq; + break; + case CMPGE_p_p_zw: + case CMPGE_p_p_zz: + cond = ge; + break; + case CMPGT_p_p_zw: + case CMPGT_p_p_zz: + cond = gt; + break; + case CMPHI_p_p_zw: + case CMPHI_p_p_zz: + cond = hi; + break; + case CMPHS_p_p_zw: + case CMPHS_p_p_zz: + cond = hs; + break; + case CMPNE_p_p_zw: + case CMPNE_p_p_zz: + cond = ne; + break; + case CMPLE_p_p_zw: + cond = le; + break; + case CMPLO_p_p_zw: + cond = lo; + break; + case CMPLS_p_p_zw: + cond = ls; + break; + case CMPLT_p_p_zw: + cond = lt; + break; + default: + VIXL_UNIMPLEMENTED(); + cond = al; + break; + } + + SVEIntCompareVectorsHelper(cond, + instr->GetSVEVectorFormat(), + ReadPRegister(instr->GetPd()), + ReadPRegister(instr->GetPgLow8()), + ReadVRegister(instr->GetRn()), + ReadVRegister(instr->GetRm()), + is_wide_elements); +} + +void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) || + (vform == kFormatVnD)); + + switch (instr->Mask(SVEFPExponentialAcceleratorMask)) { + case FEXPA_z_z: + fexpa(vform, zd, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) || + (vform == kFormatVnD)); + + switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) { + case FTSSEL_z_zz: + ftssel(vform, zd, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEConstructivePrefix_Unpredicated( + const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) { + case MOVPRFX_z_z: + mov(kFormatVnD, zd, zn); // The lane size is arbitrary. + // Record the movprfx, so the next ExecuteInstruction() can check it. + movprfx_ = instr; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + SimVRegister result; + switch (instr->Mask(SVEIntMulAddPredicatedMask)) { + case MLA_z_p_zzz: + mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm); + break; + case MLS_z_p_zzz: + mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm); + break; + case MAD_z_p_zzz: + // 'za' is encoded in 'Rn'. + mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm); + break; + case MSB_z_p_zzz: { + // 'za' is encoded in 'Rn'. + mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result); +} + +void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) { + case SDOT_z_zzz: + sdot(vform, zda, zn, zm); + break; + case UDOT_z_zzz: + udot(vform, zda, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEMovprfx(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + + switch (instr->Mask(SVEMovprfxMask)) { + case MOVPRFX_z_p_z: + if (instr->ExtractBit(16)) { + mov_merging(vform, zd, pg, zn); + } else { + mov_zeroing(vform, zd, pg, zn); + } + + // Record the movprfx, so the next ExecuteInstruction() can check it. + movprfx_ = instr; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEIntReduction(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& vd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) { + switch (instr->Mask(SVEIntReductionLogicalMask)) { + case ANDV_r_p_z: + andv(vform, vd, pg, zn); + break; + case EORV_r_p_z: + eorv(vform, vd, pg, zn); + break; + case ORV_r_p_z: + orv(vform, vd, pg, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } else { + switch (instr->Mask(SVEIntReductionMask)) { + case SADDV_r_p_z: + saddv(vform, vd, pg, zn); + break; + case SMAXV_r_p_z: + smaxv(vform, vd, pg, zn); + break; + case SMINV_r_p_z: + sminv(vform, vd, pg, zn); + break; + case UADDV_r_p_z: + uaddv(vform, vd, pg, zn); + break; + case UMAXV_r_p_z: + umaxv(vform, vd, pg, zn); + break; + case UMINV_r_p_z: + uminv(vform, vd, pg, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } +} + +void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + SimVRegister result; + switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) { + case ABS_z_p_z: + abs(vform, result, zn); + break; + case CLS_z_p_z: + cls(vform, result, zn); + break; + case CLZ_z_p_z: + clz(vform, result, zn); + break; + case CNOT_z_p_z: + cnot(vform, result, zn); + break; + case CNT_z_p_z: + cnt(vform, result, zn); + break; + case FABS_z_p_z: + fabs_(vform, result, zn); + break; + case FNEG_z_p_z: + fneg(vform, result, zn); + break; + case NEG_z_p_z: + neg(vform, result, zn); + break; + case NOT_z_p_z: + not_(vform, result, zn); + break; + case SXTB_z_p_z: + case SXTH_z_p_z: + case SXTW_z_p_z: + sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17))); + break; + case UXTB_z_p_z: + case UXTH_z_p_z: + case UXTW_z_p_z: + uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17))); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + mov_merging(vform, zd, pg, result); +} + +void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) { + // There is only one instruction in this group. + VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i); + + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16)); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + + SimVRegister result; + switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { + case FCPY_z_p_i: { + int imm8 = instr->ExtractBits(12, 5); + uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), + Instruction::Imm8ToFP64(imm8)); + dup_immediate(vform, result, value); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } + mov_merging(vform, zd, pg, result); +} + +void Simulator::VisitSVEIntAddSubtractImm_Unpredicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister scratch; + + uint64_t imm = instr->GetImmSVEIntWideUnsigned(); + imm <<= instr->ExtractBit(13) * 8; + + switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { + case ADD_z_zi: + add_uint(vform, zd, zd, imm); + break; + case SQADD_z_zi: + add_uint(vform, zd, zd, imm).SignedSaturate(vform); + break; + case SQSUB_z_zi: + sub_uint(vform, zd, zd, imm).SignedSaturate(vform); + break; + case SUBR_z_zi: + dup_immediate(vform, scratch, imm); + sub(vform, zd, scratch, zd); + break; + case SUB_z_zi: + sub_uint(vform, zd, zd, imm); + break; + case UQADD_z_zi: + add_uint(vform, zd, zd, imm).UnsignedSaturate(vform); + break; + case UQSUB_z_zi: + sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform); + break; + default: + break; + } +} + +void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + + VectorFormat format = instr->GetSVEVectorFormat(); + int64_t imm = instr->GetImmSVEIntWideSigned(); + int shift = instr->ExtractBit(13) * 8; + imm *= 1 << shift; + + switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) { + case DUP_z_i: + // The encoding of byte-sized lanes with lsl #8 is undefined. + if ((format == kFormatVnB) && (shift == 8)) { + VIXL_UNIMPLEMENTED(); + } else { + dup_immediate(format, zd, imm); + } + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + + switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { + case FDUP_z_i: + switch (vform) { + case kFormatVnH: + dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16())); + break; + case kFormatVnS: + dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32())); + break; + case kFormatVnD: + dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64())); + break; + default: + VIXL_UNIMPLEMENTED(); + } + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets( + const Instruction* instr) { + switch (instr->Mask( + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) { + case LD1H_z_p_bz_s_x32_scaled: + case LD1SH_z_p_bz_s_x32_scaled: + case LDFF1H_z_p_bz_s_x32_scaled: + case LDFF1SH_z_p_bz_s_x32_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; + SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod); +} + +void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets( + const Instruction* instr) { + switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) { + case LD1B_z_p_bz_s_x32_unscaled: + case LD1H_z_p_bz_s_x32_unscaled: + case LD1SB_z_p_bz_s_x32_unscaled: + case LD1SH_z_p_bz_s_x32_unscaled: + case LD1W_z_p_bz_s_x32_unscaled: + case LDFF1B_z_p_bz_s_x32_unscaled: + case LDFF1H_z_p_bz_s_x32_unscaled: + case LDFF1SB_z_p_bz_s_x32_unscaled: + case LDFF1SH_z_p_bz_s_x32_unscaled: + case LDFF1W_z_p_bz_s_x32_unscaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; + SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod); +} + +void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) { + case LD1B_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LD1H_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LD1SB_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LD1SH_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LD1W_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LDFF1B_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LDFF1H_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LDFF1SB_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LDFF1SH_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + case LDFF1W_z_p_ai_s: + VIXL_UNIMPLEMENTED(); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets( + const Instruction* instr) { + switch ( + instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) { + case LD1W_z_p_bz_s_x32_scaled: + case LDFF1W_z_p_bz_s_x32_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; + SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod); +} + +void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets( + const Instruction* instr) { + switch ( + instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_bz_s_x32_scaled: + case PRFD_i_p_bz_s_x32_scaled: + case PRFH_i_p_bz_s_x32_scaled: + case PRFW_i_p_bz_s_x32_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_ai_s: + case PRFD_i_p_ai_s: + case PRFH_i_p_ai_s: + case PRFW_i_p_ai_s: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_bi_s: + case PRFD_i_p_bi_s: + case PRFH_i_p_bi_s: + case PRFW_i_p_bi_s: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar( + const Instruction* instr) { + switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_br_s: + case PRFD_i_p_br_s: + case PRFH_i_p_br_s: + case PRFW_i_p_br_s: + if (instr->GetRm() == kZeroRegCode) { + VIXL_UNIMPLEMENTED(); + } + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) { + bool is_signed; + switch (instr->Mask(SVELoadAndBroadcastElementMask)) { + case LD1RB_z_p_bi_u8: + case LD1RB_z_p_bi_u16: + case LD1RB_z_p_bi_u32: + case LD1RB_z_p_bi_u64: + case LD1RH_z_p_bi_u16: + case LD1RH_z_p_bi_u32: + case LD1RH_z_p_bi_u64: + case LD1RW_z_p_bi_u32: + case LD1RW_z_p_bi_u64: + case LD1RD_z_p_bi_u64: + is_signed = false; + break; + case LD1RSB_z_p_bi_s16: + case LD1RSB_z_p_bi_s32: + case LD1RSB_z_p_bi_s64: + case LD1RSH_z_p_bi_s32: + case LD1RSH_z_p_bi_s64: + case LD1RSW_z_p_bi_s64: + is_signed = true; + break; + default: + // This encoding group is complete, so no other values should be possible. + VIXL_UNREACHABLE(); + is_signed = false; + break; + } + + int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); + int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13); + VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); + uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()) + offset; + VectorFormat unpack_vform = + SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); + SimVRegister temp; + ld1r(vform, unpack_vform, temp, base, is_signed); + mov_zeroing(vform, + ReadVRegister(instr->GetRt()), + ReadPRegister(instr->GetPgLow8()), + temp); +} + +void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) { + switch (instr->Mask(SVELoadPredicateRegisterMask)) { + case LDR_p_bi: { + SimPRegister& pt = ReadPRegister(instr->GetPt()); + int pl = GetPredicateLengthInBytes(); + int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); + uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); + uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl; + for (int i = 0; i < pl; i++) { + pt.Insert(i, Memory::Read<uint8_t>(address + i)); + } + LogPRead(instr->GetPt(), address); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) { + switch (instr->Mask(SVELoadVectorRegisterMask)) { + case LDR_z_bi: { + SimVRegister& zt = ReadVRegister(instr->GetRt()); + int vl = GetVectorLengthInBytes(); + int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); + uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); + uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl; + for (int i = 0; i < vl; i++) { + zt.Insert(i, Memory::Read<uint8_t>(address + i)); + } + LogZRead(instr->GetRt(), address); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( + const Instruction* instr) { + switch (instr->Mask( + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { + case LD1D_z_p_bz_d_x32_scaled: + case LD1H_z_p_bz_d_x32_scaled: + case LD1SH_z_p_bz_d_x32_scaled: + case LD1SW_z_p_bz_d_x32_scaled: + case LD1W_z_p_bz_d_x32_scaled: + case LDFF1H_z_p_bz_d_x32_scaled: + case LDFF1W_z_p_bz_d_x32_scaled: + case LDFF1D_z_p_bz_d_x32_scaled: + case LDFF1SH_z_p_bz_d_x32_scaled: + case LDFF1SW_z_p_bz_d_x32_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; + SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod); +} + +void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( + const Instruction* instr) { + switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) { + case LD1D_z_p_bz_d_64_scaled: + case LD1H_z_p_bz_d_64_scaled: + case LD1SH_z_p_bz_d_64_scaled: + case LD1SW_z_p_bz_d_64_scaled: + case LD1W_z_p_bz_d_64_scaled: + case LDFF1H_z_p_bz_d_64_scaled: + case LDFF1W_z_p_bz_d_64_scaled: + case LDFF1D_z_p_bz_d_64_scaled: + case LDFF1SH_z_p_bz_d_64_scaled: + case LDFF1SW_z_p_bz_d_64_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL); +} + +void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( + const Instruction* instr) { + switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) { + case LD1B_z_p_bz_d_64_unscaled: + case LD1D_z_p_bz_d_64_unscaled: + case LD1H_z_p_bz_d_64_unscaled: + case LD1SB_z_p_bz_d_64_unscaled: + case LD1SH_z_p_bz_d_64_unscaled: + case LD1SW_z_p_bz_d_64_unscaled: + case LD1W_z_p_bz_d_64_unscaled: + case LDFF1B_z_p_bz_d_64_unscaled: + case LDFF1D_z_p_bz_d_64_unscaled: + case LDFF1H_z_p_bz_d_64_unscaled: + case LDFF1SB_z_p_bz_d_64_unscaled: + case LDFF1SH_z_p_bz_d_64_unscaled: + case LDFF1SW_z_p_bz_d_64_unscaled: + case LDFF1W_z_p_bz_d_64_unscaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEGatherLoadScalarPlusVectorHelper(instr, + kFormatVnD, + NO_SVE_OFFSET_MODIFIER); +} + +void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( + const Instruction* instr) { + switch (instr->Mask( + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { + case LD1B_z_p_bz_d_x32_unscaled: + case LD1D_z_p_bz_d_x32_unscaled: + case LD1H_z_p_bz_d_x32_unscaled: + case LD1SB_z_p_bz_d_x32_unscaled: + case LD1SH_z_p_bz_d_x32_unscaled: + case LD1SW_z_p_bz_d_x32_unscaled: + case LD1W_z_p_bz_d_x32_unscaled: + case LDFF1B_z_p_bz_d_x32_unscaled: + case LDFF1H_z_p_bz_d_x32_unscaled: + case LDFF1W_z_p_bz_d_x32_unscaled: + case LDFF1D_z_p_bz_d_x32_unscaled: + case LDFF1SB_z_p_bz_d_x32_unscaled: + case LDFF1SH_z_p_bz_d_x32_unscaled: + case LDFF1SW_z_p_bz_d_x32_unscaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; + SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod); +} + +void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) { + case LD1B_z_p_ai_d: + case LD1D_z_p_ai_d: + case LD1H_z_p_ai_d: + case LD1SB_z_p_ai_d: + case LD1SH_z_p_ai_d: + case LD1SW_z_p_ai_d: + case LD1W_z_p_ai_d: + case LDFF1B_z_p_ai_d: + case LDFF1D_z_p_ai_d: + case LDFF1H_z_p_ai_d: + case LDFF1SB_z_p_ai_d: + case LDFF1SH_z_p_ai_d: + case LDFF1SW_z_p_ai_d: + case LDFF1W_z_p_ai_d: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + bool is_signed = instr->ExtractBit(14) == 0; + bool is_ff = instr->ExtractBit(13) == 1; + // Note that these instructions don't use the Dtype encoding. + int msize_in_bytes_log2 = instr->ExtractBits(24, 23); + uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2; + LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + if (is_ff) { + VIXL_UNIMPLEMENTED(); + } else { + SVEStructuredLoadHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + is_signed); + } +} + +void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets( + const Instruction* instr) { + switch ( + instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_bz_d_64_scaled: + case PRFD_i_p_bz_d_64_scaled: + case PRFH_i_p_bz_d_64_scaled: + case PRFW_i_p_bz_d_64_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator:: + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets( + const Instruction* instr) { + switch (instr->Mask( + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_bz_d_x32_scaled: + case PRFD_i_p_bz_d_x32_scaled: + case PRFH_i_p_bz_d_x32_scaled: + case PRFW_i_p_bz_d_x32_scaled: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) { + // Ignore prefetch hint instructions. + case PRFB_i_p_ai_d: + case PRFD_i_p_ai_d: + case PRFH_i_p_ai_d: + case PRFW_i_p_ai_d: + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar( + const Instruction* instr) { + bool is_signed; + switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { + case LDFF1B_z_p_br_u8: + case LDFF1B_z_p_br_u16: + case LDFF1B_z_p_br_u32: + case LDFF1B_z_p_br_u64: + case LDFF1H_z_p_br_u16: + case LDFF1H_z_p_br_u32: + case LDFF1H_z_p_br_u64: + case LDFF1W_z_p_br_u32: + case LDFF1W_z_p_br_u64: + case LDFF1D_z_p_br_u64: + is_signed = false; + break; + case LDFF1SB_z_p_br_s16: + case LDFF1SB_z_p_br_s32: + case LDFF1SB_z_p_br_s64: + case LDFF1SH_z_p_br_s32: + case LDFF1SH_z_p_br_s64: + case LDFF1SW_z_p_br_s64: + is_signed = true; + break; + default: + // This encoding group is complete, so no other values should be possible. + VIXL_UNREACHABLE(); + is_signed = false; + break; + } + + int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); + int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); + VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); + uint64_t offset = ReadXRegister(instr->GetRm()); + offset <<= msize_in_bytes_log2; + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEFaultTolerantLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + kSVEFirstFaultLoad, + is_signed); +} + +void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm( + const Instruction* instr) { + bool is_signed = false; + switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) { + case LDNF1B_z_p_bi_u16: + case LDNF1B_z_p_bi_u32: + case LDNF1B_z_p_bi_u64: + case LDNF1B_z_p_bi_u8: + case LDNF1D_z_p_bi_u64: + case LDNF1H_z_p_bi_u16: + case LDNF1H_z_p_bi_u32: + case LDNF1H_z_p_bi_u64: + case LDNF1W_z_p_bi_u32: + case LDNF1W_z_p_bi_u64: + break; + case LDNF1SB_z_p_bi_s16: + case LDNF1SB_z_p_bi_s32: + case LDNF1SB_z_p_bi_s64: + case LDNF1SH_z_p_bi_s32: + case LDNF1SH_z_p_bi_s64: + case LDNF1SW_z_p_bi_s64: + is_signed = true; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); + int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); + VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); + int vl = GetVectorLengthInBytes(); + int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2; + uint64_t offset = + (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2); + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEFaultTolerantLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + kSVENonFaultLoad, + is_signed); +} + +void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm( + const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = kFormatUndefined; + + switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) { + case LDNT1B_z_p_bi_contiguous: + vform = kFormatVnB; + break; + case LDNT1D_z_p_bi_contiguous: + vform = kFormatVnD; + break; + case LDNT1H_z_p_bi_contiguous: + vform = kFormatVnH; + break; + case LDNT1W_z_p_bi_contiguous: + vform = kFormatVnS; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + int vl = GetVectorLengthInBytes(); + uint64_t offset = instr->ExtractSignedBits(19, 16) * vl; + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredLoadHelper(vform, + pg, + instr->GetRt(), + addr, + /* is_signed = */ false); +} + +void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( + const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = kFormatUndefined; + + switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) { + case LDNT1B_z_p_br_contiguous: + vform = kFormatVnB; + break; + case LDNT1D_z_p_br_contiguous: + vform = kFormatVnD; + break; + case LDNT1H_z_p_br_contiguous: + vform = kFormatVnH; + break; + case LDNT1W_z_p_br_contiguous: + vform = kFormatVnS; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2; + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredLoadHelper(vform, + pg, + instr->GetRt(), + addr, + /* is_signed = */ false); +} + +void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm( + const Instruction* instr) { + SimVRegister& zt = ReadVRegister(instr->GetRt()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); + uint64_t offset = instr->ExtractSignedBits(19, 16) * 16; + + VectorFormat vform = kFormatUndefined; + switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) { + case LD1RQB_z_p_bi_u8: + vform = kFormatVnB; + break; + case LD1RQD_z_p_bi_u64: + vform = kFormatVnD; + break; + case LD1RQH_z_p_bi_u16: + vform = kFormatVnH; + break; + case LD1RQW_z_p_bi_u32: + vform = kFormatVnS; + break; + default: + addr = offset = 0; + break; + } + ld1(kFormat16B, zt, addr + offset); + mov_zeroing(vform, zt, pg, zt); + dup_element(kFormatVnQ, zt, zt, 0); +} + +void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar( + const Instruction* instr) { + SimVRegister& zt = ReadVRegister(instr->GetRt()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); + uint64_t offset = ReadXRegister(instr->GetRm()); + + VectorFormat vform = kFormatUndefined; + switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) { + case LD1RQB_z_p_br_contiguous: + vform = kFormatVnB; + break; + case LD1RQD_z_p_br_contiguous: + vform = kFormatVnD; + offset <<= 3; + break; + case LD1RQH_z_p_br_contiguous: + vform = kFormatVnH; + offset <<= 1; + break; + case LD1RQW_z_p_br_contiguous: + vform = kFormatVnS; + offset <<= 2; + break; + default: + addr = offset = 0; + break; + } + ld1(kFormat16B, zt, addr + offset); + mov_zeroing(vform, zt, pg, zt); + dup_element(kFormatVnQ, zt, zt, 0); +} + +void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) { + case LD2B_z_p_bi_contiguous: + case LD2D_z_p_bi_contiguous: + case LD2H_z_p_bi_contiguous: + case LD2W_z_p_bi_contiguous: + case LD3B_z_p_bi_contiguous: + case LD3D_z_p_bi_contiguous: + case LD3H_z_p_bi_contiguous: + case LD3W_z_p_bi_contiguous: + case LD4B_z_p_bi_contiguous: + case LD4D_z_p_bi_contiguous: + case LD4H_z_p_bi_contiguous: + case LD4W_z_p_bi_contiguous: { + int vl = GetVectorLengthInBytes(); + int msz = instr->ExtractBits(24, 23); + int reg_count = instr->ExtractBits(22, 21) + 1; + uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count; + LogicSVEAddressVector addr( + ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); + addr.SetMsizeInBytesLog2(msz); + addr.SetRegCount(reg_count); + SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz), + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar( + const Instruction* instr) { + switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) { + case LD2B_z_p_br_contiguous: + case LD2D_z_p_br_contiguous: + case LD2H_z_p_br_contiguous: + case LD2W_z_p_br_contiguous: + case LD3B_z_p_br_contiguous: + case LD3D_z_p_br_contiguous: + case LD3H_z_p_br_contiguous: + case LD3W_z_p_br_contiguous: + case LD4B_z_p_br_contiguous: + case LD4D_z_p_br_contiguous: + case LD4H_z_p_br_contiguous: + case LD4W_z_p_br_contiguous: { + int msz = instr->ExtractBits(24, 23); + uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz); + LogicSVEAddressVector addr( + ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); + addr.SetMsizeInBytesLog2(msz); + addr.SetRegCount(instr->ExtractBits(22, 21) + 1); + SVEStructuredLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + false); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( + const Instruction* instr) { + switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { + case ST1H_z_p_bz_s_x32_scaled: + case ST1W_z_p_bz_s_x32_scaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnS, + mod, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnS, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( + const Instruction* instr) { + switch ( + instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { + case ST1B_z_p_bz_s_x32_unscaled: + case ST1H_z_p_bz_s_x32_unscaled: + case ST1W_z_p_bz_s_x32_unscaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnS, + mod); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnS, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE32BitScatterStore_VectorPlusImm( + const Instruction* instr) { + int msz = 0; + switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) { + case ST1B_z_p_ai_s: + msz = 0; + break; + case ST1H_z_p_ai_s: + msz = 1; + break; + case ST1W_z_p_ai_s: + msz = 2; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + uint64_t imm = instr->ExtractBits(20, 16) << msz; + LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS); + addr.SetMsizeInBytesLog2(msz); + SVEStructuredStoreHelper(kFormatVnS, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); +} + +void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets( + const Instruction* instr) { + switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) { + case ST1D_z_p_bz_d_64_scaled: + case ST1H_z_p_bz_d_64_scaled: + case ST1W_z_p_bz_d_64_scaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()); + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnD, + SVE_LSL, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( + const Instruction* instr) { + switch ( + instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) { + case ST1B_z_p_bz_d_64_unscaled: + case ST1D_z_p_bz_d_64_unscaled: + case ST1H_z_p_bz_d_64_unscaled: + case ST1W_z_p_bz_d_64_unscaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + uint64_t base = ReadXRegister(instr->GetRn()); + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnD, + NO_SVE_OFFSET_MODIFIER); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( + const Instruction* instr) { + switch (instr->Mask( + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { + case ST1D_z_p_bz_d_x32_scaled: + case ST1H_z_p_bz_d_x32_scaled: + case ST1W_z_p_bz_d_x32_scaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnD, + mod, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator:: + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( + const Instruction* instr) { + switch (instr->Mask( + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { + case ST1B_z_p_bz_d_x32_unscaled: + case ST1D_z_p_bz_d_x32_unscaled: + case ST1H_z_p_bz_d_x32_unscaled: + case ST1W_z_p_bz_d_x32_unscaled: { + unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); + uint64_t base = ReadXRegister(instr->GetRn()); + SVEOffsetModifier mod = + (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + kFormatVnD, + mod); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVE64BitScatterStore_VectorPlusImm( + const Instruction* instr) { + int msz = 0; + switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) { + case ST1B_z_p_ai_d: + msz = 0; + break; + case ST1D_z_p_ai_d: + msz = 3; + break; + case ST1H_z_p_ai_d: + msz = 1; + break; + case ST1W_z_p_ai_d: + msz = 2; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + uint64_t imm = instr->ExtractBits(20, 16) << msz; + LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD); + addr.SetMsizeInBytesLog2(msz); + SVEStructuredStoreHelper(kFormatVnD, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); +} + +void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm( + const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = kFormatUndefined; + + switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) { + case STNT1B_z_p_bi_contiguous: + vform = kFormatVnB; + break; + case STNT1D_z_p_bi_contiguous: + vform = kFormatVnD; + break; + case STNT1H_z_p_bi_contiguous: + vform = kFormatVnH; + break; + case STNT1W_z_p_bi_contiguous: + vform = kFormatVnS; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + int vl = GetVectorLengthInBytes(); + uint64_t offset = instr->ExtractSignedBits(19, 16) * vl; + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr); +} + +void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( + const Instruction* instr) { + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + VectorFormat vform = kFormatUndefined; + + switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) { + case STNT1B_z_p_br_contiguous: + vform = kFormatVnB; + break; + case STNT1D_z_p_br_contiguous: + vform = kFormatVnD; + break; + case STNT1H_z_p_br_contiguous: + vform = kFormatVnH; + break; + case STNT1W_z_p_br_contiguous: + vform = kFormatVnS; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2; + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr); +} + +void Simulator::VisitSVEContiguousStore_ScalarPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) { + case ST1B_z_p_bi: + case ST1D_z_p_bi: + case ST1H_z_p_bi: + case ST1W_z_p_bi: { + int vl = GetVectorLengthInBytes(); + int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); + int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false); + VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); + int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2; + uint64_t offset = + (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2); + VectorFormat vform = + SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredStoreHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEContiguousStore_ScalarPlusScalar( + const Instruction* instr) { + switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) { + case ST1B_z_p_br: + case ST1D_z_p_br: + case ST1H_z_p_br: + case ST1W_z_p_br: { + uint64_t offset = ReadXRegister(instr->GetRm()); + offset <<= instr->ExtractBits(24, 23); + VectorFormat vform = + SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21)); + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23)); + SVEStructuredStoreHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister z_result; + + switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { + case CPY_z_p_v: + dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0); + mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm( + const Instruction* instr) { + switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) { + case ST2B_z_p_bi_contiguous: + case ST2D_z_p_bi_contiguous: + case ST2H_z_p_bi_contiguous: + case ST2W_z_p_bi_contiguous: + case ST3B_z_p_bi_contiguous: + case ST3D_z_p_bi_contiguous: + case ST3H_z_p_bi_contiguous: + case ST3W_z_p_bi_contiguous: + case ST4B_z_p_bi_contiguous: + case ST4D_z_p_bi_contiguous: + case ST4H_z_p_bi_contiguous: + case ST4W_z_p_bi_contiguous: { + int vl = GetVectorLengthInBytes(); + int msz = instr->ExtractBits(24, 23); + int reg_count = instr->ExtractBits(22, 21) + 1; + uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count; + LogicSVEAddressVector addr( + ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); + addr.SetMsizeInBytesLog2(msz); + addr.SetRegCount(reg_count); + SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz), + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar( + const Instruction* instr) { + switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) { + case ST2B_z_p_br_contiguous: + case ST2D_z_p_br_contiguous: + case ST2H_z_p_br_contiguous: + case ST2W_z_p_br_contiguous: + case ST3B_z_p_br_contiguous: + case ST3D_z_p_br_contiguous: + case ST3H_z_p_br_contiguous: + case ST3W_z_p_br_contiguous: + case ST4B_z_p_br_contiguous: + case ST4D_z_p_br_contiguous: + case ST4H_z_p_br_contiguous: + case ST4W_z_p_br_contiguous: { + int msz = instr->ExtractBits(24, 23); + uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz); + LogicSVEAddressVector addr( + ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); + addr.SetMsizeInBytesLog2(msz); + addr.SetRegCount(instr->ExtractBits(22, 21) + 1); + SVEStructuredStoreHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) { + switch (instr->Mask(SVEStorePredicateRegisterMask)) { + case STR_p_bi: { + SimPRegister& pt = ReadPRegister(instr->GetPt()); + int pl = GetPredicateLengthInBytes(); + int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); + uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); + uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl; + for (int i = 0; i < pl; i++) { + Memory::Write(address + i, pt.GetLane<uint8_t>(i)); + } + LogPWrite(instr->GetPt(), address); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) { + switch (instr->Mask(SVEStoreVectorRegisterMask)) { + case STR_z_bi: { + SimVRegister& zt = ReadVRegister(instr->GetRt()); + int vl = GetVectorLengthInBytes(); + int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); + uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); + uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl; + for (int i = 0; i < vl; i++) { + Memory::Write(address + i, zt.GetLane<uint8_t>(i)); + } + LogZWrite(instr->GetRt(), address); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEMulIndex(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zda = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + + switch (instr->Mask(SVEMulIndexMask)) { + case SDOT_z_zzzi_d: + sdot(vform, + zda, + zn, + ReadVRegister(instr->ExtractBits(19, 16)), + instr->ExtractBit(20)); + break; + case SDOT_z_zzzi_s: + sdot(vform, + zda, + zn, + ReadVRegister(instr->ExtractBits(18, 16)), + instr->ExtractBits(20, 19)); + break; + case UDOT_z_zzzi_d: + udot(vform, + zda, + zn, + ReadVRegister(instr->ExtractBits(19, 16)), + instr->ExtractBit(20)); + break; + case UDOT_z_zzzi_s: + udot(vform, + zda, + zn, + ReadVRegister(instr->ExtractBits(18, 16)), + instr->ExtractBits(20, 19)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) { + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + SimPRegister result; + + switch (instr->Mask(SVEPartitionBreakConditionMask)) { + case BRKAS_p_p_p_z: + case BRKA_p_p_p: + brka(result, pg, pn); + break; + case BRKBS_p_p_p_z: + case BRKB_p_p_p: + brkb(result, pg, pn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (instr->ExtractBit(4) == 1) { + mov_merging(pd, pg, result); + } else { + mov_zeroing(pd, pg, result); + } + + // Set flag if needed. + if (instr->ExtractBit(22) == 1) { + PredTest(kFormatVnB, pg, pd); + } +} + +void Simulator::VisitSVEPropagateBreakToNextPartition( + const Instruction* instr) { + SimPRegister& pdm = ReadPRegister(instr->GetPd()); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + + switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) { + case BRKNS_p_p_pp: + case BRKN_p_p_pp: + brkn(pdm, pg, pn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + // Set flag if needed. + if (instr->ExtractBit(22) == 1) { + // Note that this ignores `pg`. + PredTest(kFormatVnB, GetPTrue(), pdm); + } +} + +void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) { + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + + SimVRegister temp = Simulator::ExpandToSimVRegister(pn); + SimVRegister zero; + dup_immediate(kFormatVnB, zero, 0); + + switch (instr->Mask(SVEUnpackPredicateElementsMask)) { + case PUNPKHI_p_p: + zip2(kFormatVnB, temp, temp, zero); + break; + case PUNPKLO_p_p: + zip1(kFormatVnB, temp, temp, zero); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp); +} + +void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + SimPRegister& pm = ReadPRegister(instr->GetPm()); + + SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn); + SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm); + + switch (instr->Mask(SVEPermutePredicateElementsMask)) { + case TRN1_p_pp: + trn1(vform, temp0, temp0, temp1); + break; + case TRN2_p_pp: + trn2(vform, temp0, temp0, temp1); + break; + case UZP1_p_pp: + uzp1(vform, temp0, temp0, temp1); + break; + case UZP2_p_pp: + uzp2(vform, temp0, temp0, temp1); + break; + case ZIP1_p_pp: + zip1(vform, temp0, temp0, temp1); + break; + case ZIP2_p_pp: + zip2(vform, temp0, temp0, temp1); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0); +} + +void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) { + switch (instr->Mask(SVEReversePredicateElementsMask)) { + case REV_p_p: { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimVRegister temp = Simulator::ExpandToSimVRegister(pn); + rev(vform, temp, temp); + Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) { + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + // Second source register "Zm" is encoded where "Zn" would usually be. + SimVRegister& zm = ReadVRegister(instr->GetRn()); + + const int imm8h_mask = 0x001F0000; + const int imm8l_mask = 0x00001C00; + int index = instr->ExtractBits<imm8h_mask | imm8l_mask>(); + int vl = GetVectorLengthInBytes(); + index = (index >= vl) ? 0 : index; + + switch (instr->Mask(SVEPermuteVectorExtractMask)) { + case EXT_z_zi_des: + ext(kFormatVnB, zdn, zdn, zm, index); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + switch (instr->Mask(SVEPermuteVectorInterleavingMask)) { + case TRN1_z_zz: + trn1(vform, zd, zn, zm); + break; + case TRN2_z_zz: + trn2(vform, zd, zn, zm); + break; + case UZP1_z_zz: + uzp1(vform, zd, zn, zm); + break; + case UZP2_z_zz: + uzp2(vform, zd, zn, zm); + break; + case ZIP1_z_zz: + zip1(vform, zd, zn, zm); + break; + case ZIP2_z_zz: + zip2(vform, zd, zn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEConditionallyBroadcastElementToVector( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + int active_offset = -1; + switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) { + case CLASTA_z_p_zz: + active_offset = 1; + break; + case CLASTB_z_p_zz: + active_offset = 0; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (active_offset >= 0) { + std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); + if (value.first) { + dup_immediate(vform, zdn, value.second); + } else { + // Trigger a line of trace for the operation, even though it doesn't + // change the register value. + mov(vform, zdn, zdn); + } + } +} + +void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& vdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + int active_offset = -1; + switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) { + case CLASTA_v_p_z: + active_offset = 1; + break; + case CLASTB_v_p_z: + active_offset = 0; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (active_offset >= 0) { + LogicVRegister dst(vdn); + uint64_t src1_value = dst.Uint(vform, 0); + std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset); + dup_immediate(vform, vdn, 0); + dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value); + } +} + +void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + int active_offset = -1; + switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) { + case CLASTA_r_p_z: + active_offset = 1; + break; + case CLASTB_r_p_z: + active_offset = 0; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (active_offset >= 0) { + std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); + uint64_t masked_src = ReadXRegister(instr->GetRd()) & + GetUintMask(LaneSizeInBitsFromFormat(vform)); + WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src); + } +} + +void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& vdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + int active_offset = -1; + switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) { + case LASTA_v_p_z: + active_offset = 1; + break; + case LASTB_v_p_z: + active_offset = 0; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (active_offset >= 0) { + LogicVRegister dst(vdn); + std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); + dup_immediate(vform, vdn, 0); + dst.SetUint(vform, 0, value.second); + } +} + +void Simulator::VisitSVEExtractElementToGeneralRegister( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + int active_offset = -1; + switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) { + case LASTA_r_p_z: + active_offset = 1; + break; + case LASTB_r_p_z: + active_offset = 0; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (active_offset >= 0) { + std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); + WriteXRegister(instr->GetRd(), value.second); + } +} + +void Simulator::VisitSVECompressActiveElements(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + switch (instr->Mask(SVECompressActiveElementsMask)) { + case COMPACT_z_p_z: + compact(vform, zd, pg, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated( + const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister z_result; + + switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { + case CPY_z_p_r: + dup_immediate(vform, + z_result, + ReadXRegister(instr->GetRn(), Reg31IsStackPointer)); + mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16)); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + + SimVRegister result; + switch (instr->Mask(SVECopyIntImm_PredicatedMask)) { + case CPY_z_p_i: { + // Use unsigned arithmetic to avoid undefined behaviour during the shift. + uint64_t imm8 = instr->GetImmSVEIntWideSigned(); + dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8)); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (instr->ExtractBit(14) != 0) { + mov_merging(vform, zd, pg, result); + } else { + mov_zeroing(vform, zd, pg, result); + } +} + +void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + SimVRegister result; + + // In NEON, the chunk size in which elements are REVersed is in the + // instruction mnemonic, and the element size attached to the register. + // SVE reverses the semantics; the mapping to logic functions below is to + // account for this. + VectorFormat chunk_form = instr->GetSVEVectorFormat(); + VectorFormat element_form = kFormatUndefined; + + switch (instr->Mask(SVEReverseWithinElementsMask)) { + case RBIT_z_p_z: + rbit(chunk_form, result, zn); + break; + case REVB_z_z: + VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) || + (chunk_form == kFormatVnD)); + element_form = kFormatVnB; + break; + case REVH_z_z: + VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD)); + element_form = kFormatVnH; + break; + case REVW_z_z: + VIXL_ASSERT(chunk_form == kFormatVnD); + element_form = kFormatVnS; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) { + VIXL_ASSERT(element_form != kFormatUndefined); + switch (chunk_form) { + case kFormatVnH: + rev16(element_form, result, zn); + break; + case kFormatVnS: + rev32(element_form, result, zn); + break; + case kFormatVnD: + rev64(element_form, result, zn); + break; + default: + VIXL_UNIMPLEMENTED(); + } + } + + mov_merging(chunk_form, zd, pg, result); +} + +void Simulator::VisitSVEVectorSplice_Destructive(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zdn = ReadVRegister(instr->GetRd()); + SimVRegister& zm = ReadVRegister(instr->GetRn()); + SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); + + switch (instr->Mask(SVEVectorSplice_DestructiveMask)) { + case SPLICE_z_p_zz_des: + splice(vform, zdn, pg, zdn, zm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) { + case DUP_z_r: + dup_immediate(instr->GetSVEVectorFormat(), + zd, + ReadXRegister(instr->GetRn(), Reg31IsStackPointer)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) { + case INSR_z_v: + insr(vform, zd, ReadDRegisterBits(instr->GetRn())); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + switch (instr->Mask(SVEInsertGeneralRegisterMask)) { + case INSR_z_r: + insr(vform, zd, ReadXRegister(instr->GetRn())); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + switch (instr->Mask(SVEBroadcastIndexElementMask)) { + case DUP_z_zi: { + std::pair<int, int> index_and_lane_size = + instr->GetSVEPermuteIndexAndLaneSizeLog2(); + int index = index_and_lane_size.first; + int lane_size_in_bytes_log_2 = index_and_lane_size.second; + VectorFormat vform = + SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2); + if ((index < 0) || (index >= LaneCountFromFormat(vform))) { + // Out of bounds, set the destination register to zero. + dup_immediate(kFormatVnD, zd, 0); + } else { + dup_element(vform, zd, ReadVRegister(instr->GetRn()), index); + } + return; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + switch (instr->Mask(SVEReverseVectorElementsMask)) { + case REV_z_z: + rev(vform, zd, ReadVRegister(instr->GetRn())); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + switch (instr->Mask(SVEUnpackVectorElementsMask)) { + case SUNPKHI_z_z: + unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend); + break; + case SUNPKLO_z_z: + unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend); + break; + case UUNPKHI_z_z: + unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend); + break; + case UUNPKLO_z_z: + unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVETableLookup(const Instruction* instr) { + SimVRegister& zd = ReadVRegister(instr->GetRd()); + switch (instr->Mask(SVETableLookupMask)) { + case TBL_z_zz_1: + Table(instr->GetSVEVectorFormat(), + zd, + ReadVRegister(instr->GetRn()), + ReadVRegister(instr->GetRm())); + return; + default: + break; + } +} + +void Simulator::VisitSVEPredicateCount(const Instruction* instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + + switch (instr->Mask(SVEPredicateCountMask)) { + case CNTP_r_p_p: { + WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn)); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPredicateLogical(const Instruction* instr) { + Instr op = instr->Mask(SVEPredicateLogicalMask); + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + SimPRegister& pm = ReadPRegister(instr->GetPm()); + SimPRegister result; + switch (op) { + case ANDS_p_p_pp_z: + case AND_p_p_pp_z: + case BICS_p_p_pp_z: + case BIC_p_p_pp_z: + case EORS_p_p_pp_z: + case EOR_p_p_pp_z: + case NANDS_p_p_pp_z: + case NAND_p_p_pp_z: + case NORS_p_p_pp_z: + case NOR_p_p_pp_z: + case ORNS_p_p_pp_z: + case ORN_p_p_pp_z: + case ORRS_p_p_pp_z: + case ORR_p_p_pp_z: + SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op), + result, + pn, + pm); + break; + case SEL_p_p_pp: + sel(pd, pg, pn, pm); + return; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + mov_zeroing(pd, pg, result); + if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) { + PredTest(kFormatVnB, pg, pd); + } +} + +void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) { + LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5)); + LogicPRegister pdn = ReadPRegister(instr->GetPd()); + switch (instr->Mask(SVEPredicateFirstActiveMask)) { + case PFIRST_p_p_p: + pfirst(pdn, pg, pdn); + // TODO: Is this broken when pg == pdn? + PredTest(kFormatVnB, pg, pdn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) { + // This group only contains PTRUE{S}, and there are no unallocated encodings. + VIXL_STATIC_ASSERT( + SVEPredicateInitializeMask == + (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit)); + VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) || + (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s)); + + LogicPRegister pdn = ReadPRegister(instr->GetPd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + + ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint()); + if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn); +} + +void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) { + // This group only contains PNEXT, and there are no unallocated encodings. + VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask); + VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p); + + LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5)); + LogicPRegister pdn = ReadPRegister(instr->GetPd()); + VectorFormat vform = instr->GetSVEVectorFormat(); + + pnext(vform, pdn, pg, pdn); + // TODO: Is this broken when pg == pdn? + PredTest(vform, pg, pdn); +} + +void Simulator::VisitSVEPredicateReadFromFFR_Predicated( + const Instruction* instr) { + LogicPRegister pd(ReadPRegister(instr->GetPd())); + LogicPRegister pg(ReadPRegister(instr->GetPn())); + FlagsUpdate flags = LeaveFlags; + switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) { + case RDFFR_p_p_f: + // Do nothing. + break; + case RDFFRS_p_p_f: + flags = SetFlags; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + LogicPRegister ffr(ReadFFR()); + mov_zeroing(pd, pg, ffr); + + if (flags == SetFlags) { + PredTest(kFormatVnB, pg, pd); + } +} + +void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated( + const Instruction* instr) { + LogicPRegister pd(ReadPRegister(instr->GetPd())); + LogicPRegister ffr(ReadFFR()); + switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) { + case RDFFR_p_f: + mov(pd, ffr); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPredicateTest(const Instruction* instr) { + switch (instr->Mask(SVEPredicateTestMask)) { + case PTEST_p_p: + PredTest(kFormatVnB, + ReadPRegister(instr->ExtractBits(13, 10)), + ReadPRegister(instr->GetPn())); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPredicateZero(const Instruction* instr) { + switch (instr->Mask(SVEPredicateZeroMask)) { + case PFALSE_p: + pfalse(ReadPRegister(instr->GetPd())); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEPropagateBreak(const Instruction* instr) { + SimPRegister& pd = ReadPRegister(instr->GetPd()); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); + SimPRegister& pn = ReadPRegister(instr->GetPn()); + SimPRegister& pm = ReadPRegister(instr->GetPm()); + + bool set_flags = false; + switch (instr->Mask(SVEPropagateBreakMask)) { + case BRKPAS_p_p_pp: + set_flags = true; + VIXL_FALLTHROUGH(); + case BRKPA_p_p_pp: + brkpa(pd, pg, pn, pm); + break; + case BRKPBS_p_p_pp: + set_flags = true; + VIXL_FALLTHROUGH(); + case BRKPB_p_p_pp: + brkpb(pd, pg, pn, pm); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (set_flags) { + PredTest(kFormatVnB, pg, pd); + } +} + +void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) { + uint64_t length = 0; + switch (instr->Mask(SVEStackFrameAdjustmentMask)) { + case ADDPL_r_ri: + length = GetPredicateLengthInBytes(); + break; + case ADDVL_r_ri: + length = GetVectorLengthInBytes(); + break; + default: + VIXL_UNIMPLEMENTED(); + } + uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer); + WriteXRegister(instr->GetRd(), + base + (length * instr->GetImmSVEVLScale()), + LogRegWrites, + Reg31IsStackPointer); +} + +void Simulator::VisitSVEStackFrameSize(const Instruction* instr) { + int64_t scale = instr->GetImmSVEVLScale(); + + switch (instr->Mask(SVEStackFrameSizeMask)) { + case RDVL_r_i: + WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale); + break; + default: + VIXL_UNIMPLEMENTED(); + } +} + +void Simulator::VisitSVEVectorSelect(const Instruction* instr) { + // The only instruction in this group is `sel`, and there are no unused + // encodings. + VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz); + + VectorFormat vform = instr->GetSVEVectorFormat(); + SimVRegister& zd = ReadVRegister(instr->GetRd()); + SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); + SimVRegister& zn = ReadVRegister(instr->GetRn()); + SimVRegister& zm = ReadVRegister(instr->GetRm()); + + sel(vform, zd, pg, zn, zm); +} + +void Simulator::VisitSVEFFRInitialise(const Instruction* instr) { + switch (instr->Mask(SVEFFRInitialiseMask)) { + case SETFFR_f: { + LogicPRegister ffr(ReadFFR()); + ffr.SetAllBits(); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) { + switch (instr->Mask(SVEFFRWriteFromPredicateMask)) { + case WRFFR_f_p: { + SimPRegister pn(ReadPRegister(instr->GetPn())); + bool last_active = true; + for (unsigned i = 0; i < pn.GetSizeInBits(); i++) { + bool active = pn.GetBit(i); + if (active && !last_active) { + // `pn` is non-monotonic. This is UNPREDICTABLE. + VIXL_ABORT(); + } + last_active = active; + } + mov(ReadFFR(), pn); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } +} + +void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) { + bool is_signed; + switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) { + case LD1B_z_p_bi_u8: + case LD1B_z_p_bi_u16: + case LD1B_z_p_bi_u32: + case LD1B_z_p_bi_u64: + case LD1H_z_p_bi_u16: + case LD1H_z_p_bi_u32: + case LD1H_z_p_bi_u64: + case LD1W_z_p_bi_u32: + case LD1W_z_p_bi_u64: + case LD1D_z_p_bi_u64: + is_signed = false; + break; + case LD1SB_z_p_bi_s16: + case LD1SB_z_p_bi_s32: + case LD1SB_z_p_bi_s64: + case LD1SH_z_p_bi_s32: + case LD1SH_z_p_bi_s64: + case LD1SW_z_p_bi_s64: + is_signed = true; + break; + default: + // This encoding group is complete, so no other values should be possible. + VIXL_UNREACHABLE(); + is_signed = false; + break; + } + + int vl = GetVectorLengthInBytes(); + int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); + int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); + VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); + int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2; + uint64_t offset = + (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + is_signed); +} + +void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar( + const Instruction* instr) { + bool is_signed; + switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { + case LD1B_z_p_br_u8: + case LD1B_z_p_br_u16: + case LD1B_z_p_br_u32: + case LD1B_z_p_br_u64: + case LD1H_z_p_br_u16: + case LD1H_z_p_br_u32: + case LD1H_z_p_br_u64: + case LD1W_z_p_br_u32: + case LD1W_z_p_br_u64: + case LD1D_z_p_br_u64: + is_signed = false; + break; + case LD1SB_z_p_br_s16: + case LD1SB_z_p_br_s32: + case LD1SB_z_p_br_s64: + case LD1SH_z_p_br_s32: + case LD1SH_z_p_br_s64: + case LD1SW_z_p_br_s64: + is_signed = true; + break; + default: + // This encoding group is complete, so no other values should be possible. + VIXL_UNREACHABLE(); + is_signed = false; + break; + } + + int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); + int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); + VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); + VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); + uint64_t offset = ReadXRegister(instr->GetRm()); + offset <<= msize_in_bytes_log2; + LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + SVEStructuredLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + is_signed); +} void Simulator::DoUnreachable(const Instruction* instr) { VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) && diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 7cb7419a..1a89dff7 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -37,7 +37,6 @@ #include "cpu-features-auditor-aarch64.h" #include "disasm-aarch64.h" #include "instructions-aarch64.h" -#include "instrument-aarch64.h" #include "simulator-constants-aarch64.h" #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 @@ -77,6 +76,22 @@ class Memory { return value; } + template <typename A> + static uint64_t Read(int size_in_bytes, A address) { + switch (size_in_bytes) { + case 1: + return Read<uint8_t>(address); + case 2: + return Read<uint16_t>(address); + case 4: + return Read<uint32_t>(address); + case 8: + return Read<uint64_t>(address); + } + VIXL_UNREACHABLE(); + return 0; + } + template <typename T, typename A> static void Write(A address, T value) { address = AddressUntag(address); @@ -87,19 +102,33 @@ class Memory { } }; -// Represent a register (r0-r31, v0-v31). -template <int kSizeInBytes> +// Represent a register (r0-r31, v0-v31, z0-z31, p0-p15). +template <unsigned kMaxSizeInBits> class SimRegisterBase { public: - SimRegisterBase() : written_since_last_log_(false) {} + static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte; + VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits); + + SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); } + + unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; } + unsigned GetSizeInBytes() const { return size_in_bytes_; } + + void SetSizeInBytes(unsigned size_in_bytes) { + VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes); + size_in_bytes_ = size_in_bytes; + } + void SetSizeInBits(unsigned size_in_bits) { + VIXL_ASSERT(size_in_bits <= kMaxSizeInBits); + VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0); + SetSizeInBytes(size_in_bits / kBitsPerByte); + } // Write the specified value. The value is zero-extended if necessary. template <typename T> void Write(T new_value) { - if (sizeof(new_value) < kSizeInBytes) { - // All AArch64 registers are zero-extending. - memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value)); - } + // All AArch64 registers are zero-extending. + if (sizeof(new_value) < GetSizeInBytes()) Clear(); WriteLane(new_value, 0); NotifyRegisterWrite(); } @@ -108,6 +137,11 @@ class SimRegisterBase { Write(new_value); } + void Clear() { + memset(value_, 0, kMaxSizeInBytes); + NotifyRegisterWrite(); + } + // Insert a typed value into a register, leaving the rest of the register // unchanged. The lane parameter indicates where in the register the value // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where @@ -137,6 +171,17 @@ class SimRegisterBase { return GetLane(lane); } + // Get the value of a specific bit, indexed from the least-significant bit of + // lane 0. + bool GetBit(int bit) const { + int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte); + int byte = bit / (sizeof(value_[0]) * kBitsPerByte); + return ((value_[byte] >> bit_in_byte) & 1) != 0; + } + + // Return a pointer to the raw, underlying byte array. + const uint8_t* GetBytes() const { return value_; } + // TODO: Make this return a map of updated bytes, so that we can highlight // updated lanes for load-and-insert. (That never happens for scalar code, but // NEON has some instructions that can update individual lanes.) @@ -145,7 +190,9 @@ class SimRegisterBase { void NotifyRegisterLogged() { written_since_last_log_ = false; } protected: - uint8_t value_[kSizeInBytes]; + uint8_t value_[kMaxSizeInBytes]; + + unsigned size_in_bytes_; // Helpers to aid with register tracing. bool written_since_last_log_; @@ -156,38 +203,152 @@ class SimRegisterBase { template <typename T> void ReadLane(T* dst, int lane) const { VIXL_ASSERT(lane >= 0); - VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= kSizeInBytes); + VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes()); memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst)); } template <typename T> void WriteLane(T src, int lane) { VIXL_ASSERT(lane >= 0); - VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= kSizeInBytes); + VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes()); memcpy(&value_[lane * sizeof(src)], &src, sizeof(src)); } + + // The default ReadLane and WriteLane methods assume what we are copying is + // "trivially copyable" by using memcpy. We have to provide alternative + // implementations for SimFloat16 which cannot be copied this way. + + void ReadLane(vixl::internal::SimFloat16* dst, int lane) const { + uint16_t rawbits; + ReadLane(&rawbits, lane); + *dst = RawbitsToFloat16(rawbits); + } + + void WriteLane(vixl::internal::SimFloat16 src, int lane) { + WriteLane(Float16ToRawbits(src), lane); + } +}; + +typedef SimRegisterBase<kXRegSize> SimRegister; // r0-r31 +typedef SimRegisterBase<kPRegMaxSize> SimPRegister; // p0-p15 +// FFR has the same format as a predicate register. +typedef SimPRegister SimFFRRegister; + +// v0-v31 and z0-z31 +class SimVRegister : public SimRegisterBase<kZRegMaxSize> { + public: + SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {} + + void NotifyAccessAsZ() { accessed_as_z_ = true; } + + void NotifyRegisterLogged() { + SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged(); + accessed_as_z_ = false; + } + + bool AccessedAsZSinceLastLog() const { return accessed_as_z_; } + + private: + bool accessed_as_z_; +}; + +// Representation of a SVE predicate register. +class LogicPRegister { + public: + inline LogicPRegister( + SimPRegister& other) // NOLINT(runtime/references)(runtime/explicit) + : register_(other) {} + + // Set a conveniently-sized block to 16 bits as the minimum predicate length + // is 16 bits and allow to be increased to multiples of 16 bits. + typedef uint16_t ChunkType; + + // Assign a bit into the end positon of the specified lane. + // The bit is zero-extended if necessary. + void SetActive(VectorFormat vform, int lane_index, bool value) { + int psize = LaneSizeInBytesFromFormat(vform); + int bit_index = lane_index * psize; + int byte_index = bit_index / kBitsPerByte; + int bit_offset = bit_index % kBitsPerByte; + uint8_t byte = register_.GetLane<uint8_t>(byte_index); + register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value)); + } + + bool IsActive(VectorFormat vform, int lane_index) const { + int psize = LaneSizeInBytesFromFormat(vform); + int bit_index = lane_index * psize; + int byte_index = bit_index / kBitsPerByte; + int bit_offset = bit_index % kBitsPerByte; + uint8_t byte = register_.GetLane<uint8_t>(byte_index); + return ExtractBit(byte, bit_offset); + } + + // The accessors for bulk processing. + int GetChunkCount() const { + VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0); + return register_.GetSizeInBytes() / sizeof(ChunkType); + } + + ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); } + + void SetChunk(int lane, ChunkType new_value) { + SetActiveMask(lane, new_value); + } + + void SetAllBits() { + int chunk_size = sizeof(ChunkType) * kBitsPerByte; + ChunkType bits = GetUintMask(chunk_size); + for (int lane = 0; + lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size)); + lane++) { + SetChunk(lane, bits); + } + } + + template <typename T> + T GetActiveMask(int lane) const { + return register_.GetLane<T>(lane); + } + + template <typename T> + void SetActiveMask(int lane, T new_value) { + register_.Insert<T>(lane, new_value); + } + + void Clear() { register_.Clear(); } + + bool Aliases(const LogicPRegister& other) const { + return ®ister_ == &other.register_; + } + + private: + // The bit assignment is zero-extended to fill the size of predicate element. + uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) { + VIXL_ASSERT(index >= 0); + VIXL_ASSERT(index + psize <= kBitsPerByte); + int bits = value ? 1 : 0; + switch (psize) { + case 1: + AssignBit(byte, index, bits); + break; + case 2: + AssignBits(byte, index, 0x03, bits); + break; + case 4: + AssignBits(byte, index, 0x0f, bits); + break; + case 8: + AssignBits(byte, index, 0xff, bits); + break; + default: + VIXL_UNREACHABLE(); + return 0; + } + return byte; + } + + SimPRegister& register_; }; -typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31 -typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31 - -// The default ReadLane and WriteLane methods assume what we are copying is -// "trivially copyable" by using memcpy. We have to provide alternative -// implementations for SimFloat16 which cannot be copied this way. - -template <> -template <> -inline void SimVRegister::ReadLane(vixl::internal::SimFloat16* dst, - int lane) const { - uint16_t rawbits; - ReadLane(&rawbits, lane); - *dst = RawbitsToFloat16(rawbits); -} - -template <> -template <> -inline void SimVRegister::WriteLane(vixl::internal::SimFloat16 src, int lane) { - WriteLane(Float16ToRawbits(src), lane); -} // Representation of a vector register, with typed getters and setters for lanes // and additional information to represent lane state. @@ -205,6 +366,7 @@ class LogicVRegister { } int64_t Int(VectorFormat vform, int index) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); int64_t element; switch (LaneSizeInBitsFromFormat(vform)) { case 8: @@ -227,6 +389,7 @@ class LogicVRegister { } uint64_t Uint(VectorFormat vform, int index) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); uint64_t element; switch (LaneSizeInBitsFromFormat(vform)) { case 8: @@ -260,6 +423,7 @@ class LogicVRegister { } void SetInt(VectorFormat vform, int index, int64_t value) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); switch (LaneSizeInBitsFromFormat(vform)) { case 8: register_.Insert(index, static_cast<int8_t>(value)); @@ -287,6 +451,7 @@ class LogicVRegister { } void SetUint(VectorFormat vform, int index, uint64_t value) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); switch (LaneSizeInBitsFromFormat(vform)) { case 8: register_.Insert(index, static_cast<uint8_t>(value)); @@ -313,7 +478,98 @@ class LogicVRegister { } } + void ReadIntFromMem(VectorFormat vform, + unsigned msize_in_bits, + int index, + uint64_t addr) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + int64_t value; + switch (msize_in_bits) { + case 8: + value = Memory::Read<int8_t>(addr); + break; + case 16: + value = Memory::Read<int16_t>(addr); + break; + case 32: + value = Memory::Read<int32_t>(addr); + break; + case 64: + value = Memory::Read<int64_t>(addr); + break; + default: + VIXL_UNREACHABLE(); + return; + } + + unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT(esize_in_bits >= msize_in_bits); + switch (esize_in_bits) { + case 8: + register_.Insert(index, static_cast<int8_t>(value)); + break; + case 16: + register_.Insert(index, static_cast<int16_t>(value)); + break; + case 32: + register_.Insert(index, static_cast<int32_t>(value)); + break; + case 64: + register_.Insert(index, static_cast<int64_t>(value)); + break; + default: + VIXL_UNREACHABLE(); + return; + } + } + + void ReadUintFromMem(VectorFormat vform, + unsigned msize_in_bits, + int index, + uint64_t addr) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + uint64_t value; + switch (msize_in_bits) { + case 8: + value = Memory::Read<uint8_t>(addr); + break; + case 16: + value = Memory::Read<uint16_t>(addr); + break; + case 32: + value = Memory::Read<uint32_t>(addr); + break; + case 64: + value = Memory::Read<uint64_t>(addr); + break; + default: + VIXL_UNREACHABLE(); + return; + } + + unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT(esize_in_bits >= msize_in_bits); + switch (esize_in_bits) { + case 8: + register_.Insert(index, static_cast<uint8_t>(value)); + break; + case 16: + register_.Insert(index, static_cast<uint16_t>(value)); + break; + case 32: + register_.Insert(index, static_cast<uint32_t>(value)); + break; + case 64: + register_.Insert(index, static_cast<uint64_t>(value)); + break; + default: + VIXL_UNREACHABLE(); + return; + } + } + void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); switch (LaneSizeInBitsFromFormat(vform)) { case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); @@ -334,6 +590,7 @@ class LogicVRegister { } void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); uint64_t value = Uint(vform, index); switch (LaneSizeInBitsFromFormat(vform)) { case 8: @@ -361,11 +618,20 @@ class LogicVRegister { register_.Insert(index, value); } - // When setting a result in a register of size less than Q, the top bits of - // the Q register must be cleared. + template <typename T> + void SetFloat(VectorFormat vform, int index, T value) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + register_.Insert(index, value); + } + + // When setting a result in a register larger than the result itself, the top + // bits of the register must be cleared. void ClearForWrite(VectorFormat vform) const { + // SVE destinations write whole registers, so we have nothing to clear. + if (IsSVEFormat(vform)) return; + unsigned size = RegisterSizeInBytesFromFormat(vform); - for (unsigned i = size; i < kQRegSizeInBytes; i++) { + for (unsigned i = size; i < register_.GetSizeInBytes(); i++) { SetUint(kFormat16B, i, 0); } } @@ -481,15 +747,129 @@ class LogicVRegister { return *this; } + int LaneCountFromFormat(VectorFormat vform) const { + if (IsSVEFormat(vform)) { + return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform); + } else { + return vixl::aarch64::LaneCountFromFormat(vform); + } + } + private: SimVRegister& register_; // Allocate one saturation state entry per lane; largest register is type Q, // and lanes can be a minimum of one byte wide. - Saturation saturated_[kQRegSizeInBytes]; + Saturation saturated_[kZRegMaxSizeInBytes]; // Allocate one rounding state entry per lane. - bool round_[kQRegSizeInBytes]; + bool round_[kZRegMaxSizeInBytes]; +}; + +// Represent an SVE addressing mode and abstract per-lane address generation to +// make iteration easy. +// +// Contiguous accesses are described with a simple base address, the memory +// occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in +// each struct (`SetRegCount()`). +// +// Scatter-gather accesses also require a SimVRegister and information about how +// to extract lanes from it. +class LogicSVEAddressVector { + public: + // scalar-plus-scalar + // scalar-plus-immediate + explicit LogicSVEAddressVector(uint64_t base) + : base_(base), + msize_in_bytes_log2_(kUnknownMsizeInBytesLog2), + reg_count_(1), + vector_(NULL), + vector_form_(kFormatUndefined), + vector_mod_(NO_SVE_OFFSET_MODIFIER), + vector_shift_(0) {} + + // scalar-plus-vector + // vector-plus-immediate + // `base` should be the constant used for each element. That is, the value + // of `xn`, or `#<imm>`. + // `vector` should be the SimVRegister with offsets for each element. The + // vector format must be specified; SVE scatter/gather accesses typically + // support both 32-bit and 64-bit addressing. + // + // `mod` and `shift` correspond to the modifiers applied to each element in + // scalar-plus-vector forms, such as those used for unpacking and + // sign-extension. They are not used for vector-plus-immediate. + LogicSVEAddressVector(uint64_t base, + const SimVRegister* vector, + VectorFormat vform, + SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER, + int shift = 0) + : base_(base), + msize_in_bytes_log2_(kUnknownMsizeInBytesLog2), + reg_count_(1), + vector_(vector), + vector_form_(vform), + vector_mod_(mod), + vector_shift_(shift) {} + + // Set `msize` -- the memory occupied by each lane -- for address + // calculations. + void SetMsizeInBytesLog2(int msize_in_bytes_log2) { + VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2)); + VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2)); + msize_in_bytes_log2_ = msize_in_bytes_log2; + } + + bool HasMsize() const { + return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2; + } + + int GetMsizeInBytesLog2() const { + VIXL_ASSERT(HasMsize()); + return msize_in_bytes_log2_; + } + int GetMsizeInBitsLog2() const { + return GetMsizeInBytesLog2() + kBitsPerByteLog2; + } + + int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); } + int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); } + + void SetRegCount(int reg_count) { + VIXL_ASSERT(reg_count >= 1); // E.g. ld1/st1 + VIXL_ASSERT(reg_count <= 4); // E.g. ld4/st4 + reg_count_ = reg_count; + } + + int GetRegCount() const { return reg_count_; } + + // Full per-element address calculation for structured accesses. + // + // Note that the register number argument (`reg`) is zero-based. + uint64_t GetElementAddress(int lane, int reg) const { + VIXL_ASSERT(reg < GetRegCount()); + // Individual structures are always contiguous in memory, so this + // implementation works for both contiguous and scatter-gather addressing. + return GetStructAddress(lane) + (reg * GetMsizeInBytes()); + } + + // Full per-struct address calculation for structured accesses. + uint64_t GetStructAddress(int lane) const; + + bool IsContiguous() const { return vector_ == NULL; } + bool IsScatterGather() const { return !IsContiguous(); } + + private: + uint64_t base_; + int msize_in_bytes_log2_; + int reg_count_; + + const SimVRegister* vector_; + VectorFormat vector_form_; + SVEOffsetModifier vector_mod_; + int vector_shift_; + + static const int kUnknownMsizeInBytesLog2 = -1; }; // The proper way to initialize a simulated system register (such as NZCV) is as @@ -733,6 +1113,11 @@ class Simulator : public DecoderVisitor { VIXL_ASSERT(IsWordAligned(pc_)); pc_modified_ = false; + if (movprfx_ != NULL) { + VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_)); + movprfx_ = NULL; + } + // On guarded pages, if BType is not zero, take an exception on any // instruction other than BTI, PACI[AB]SP, HLT or BRK. if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) { @@ -774,13 +1159,6 @@ class Simulator : public DecoderVisitor { #undef DECLARE -#define DECLARE(A) \ - VIXL_NO_RETURN_IN_DEBUG_MODE virtual void Visit##A(const Instruction* instr) \ - VIXL_OVERRIDE; - VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(DECLARE) -#undef DECLARE - - // Integer register accessors. // Basic accessor: Read the register as the specified type. @@ -827,6 +1205,13 @@ class Simulator : public DecoderVisitor { return ReadXRegister(code, r31mode); } + SimPRegister& ReadPRegister(unsigned code) { + VIXL_ASSERT(code < kNumberOfPRegisters); + return pregisters_[code]; + } + + SimFFRRegister& ReadFFR() { return ffr_register_; } + // As above, with parameterized size and return type. The value is // either zero-extended or truncated to fit, as required. template <typename T> @@ -877,6 +1262,10 @@ class Simulator : public DecoderVisitor { // Write 'value' into an integer register. The value is zero-extended. This // behaviour matches AArch64 register writes. + // + // SP may be specified in one of two ways: + // - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister) + // - (code == 31) && (r31mode == Reg31IsStackPointer) template <typename T> void WriteRegister(unsigned code, T value, @@ -896,20 +1285,25 @@ class Simulator : public DecoderVisitor { VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) || (sizeof(T) == kXRegSizeInBytes)); VIXL_ASSERT( - code < kNumberOfRegisters || + (code < kNumberOfRegisters) || ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode))); - if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { - return; - } - - if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) { - code = 31; + if (code == 31) { + if (r31mode == Reg31IsZeroRegister) { + // Discard writes to the zero register. + return; + } else { + code = kSPRegInternalCode; + } } - registers_[code].Write(value); + // registers_[31] is the stack pointer. + VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31); + registers_[code % kNumberOfRegisters].Write(value); - if (log_mode == LogRegWrites) LogRegister(code, r31mode); + if (log_mode == LogRegWrites) { + LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T))); + } } template <typename T> VIXL_DEPRECATED("WriteRegister", @@ -1015,6 +1409,11 @@ class Simulator : public DecoderVisitor { uint8_t val[kQRegSizeInBytes]; }; + // A structure for representing a SVE Z register. + struct zreg_t { + uint8_t val[kZRegMaxSizeInBytes]; + }; + // Basic accessor: read the register as the specified type. template <typename T> T ReadVRegister(unsigned code) const { @@ -1130,7 +1529,8 @@ class Simulator : public DecoderVisitor { (sizeof(value) == kHRegSizeInBytes) || (sizeof(value) == kSRegSizeInBytes) || (sizeof(value) == kDRegSizeInBytes) || - (sizeof(value) == kQRegSizeInBytes)); + (sizeof(value) == kQRegSizeInBytes) || + (sizeof(value) == kZRegMaxSizeInBytes)); VIXL_ASSERT(code < kNumberOfVRegisters); vregisters_[code].Write(value); @@ -1237,6 +1637,12 @@ class Simulator : public DecoderVisitor { WriteQRegister(code, value, log_mode); } + void WriteZRegister(unsigned code, + zreg_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + template <typename T> T ReadRegister(Register reg) const { return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister); @@ -1357,14 +1763,16 @@ class Simulator : public DecoderVisitor { kPrintRegLaneSizeD = 3 << 0, kPrintRegLaneSizeX = kPrintRegLaneSizeD, kPrintRegLaneSizeQ = 4 << 0, + kPrintRegLaneSizeUnknown = 5 << 0, kPrintRegLaneSizeOffset = 0, kPrintRegLaneSizeMask = 7 << 0, - // The lane count. + // The overall register size. kPrintRegAsScalar = 0, kPrintRegAsDVector = 1 << 3, kPrintRegAsQVector = 2 << 3, + kPrintRegAsSVEVector = 3 << 3, kPrintRegAsVectorMask = 3 << 3, @@ -1372,37 +1780,98 @@ class Simulator : public DecoderVisitor { // S-, H-, and D-sized lanes.) kPrintRegAsFP = 1 << 5, - // Supported combinations. - - kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar, - kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar, - kPrintHReg = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP, - kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, - kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, - - kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar, - kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector, - kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector, - kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar, - kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector, - kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector, - kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar, - kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector, - kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector, - kPrintReg1HFP = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP, - kPrintReg4HFP = kPrintRegLaneSizeH | kPrintRegAsDVector | kPrintRegAsFP, - kPrintReg8HFP = kPrintRegLaneSizeH | kPrintRegAsQVector | kPrintRegAsFP, - kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, - kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP, - kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP, - kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar, - kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector, - kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, - kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP, - kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar + // With this flag, print helpers won't check that the upper bits are zero. + // This also forces the register name to be printed with the `reg<msb:0>` + // format. + // + // The flag is supported with any PrintRegisterFormat other than those with + // kPrintRegAsSVEVector. + kPrintRegPartial = 1 << 6, + +// Supported combinations. +// These exist so that they can be referred to by name, but also because C++ +// does not allow enum types to hold values that aren't explicitly +// enumerated, and we want to be able to combine the above flags. + +// Scalar formats. +#define VIXL_DECL_PRINT_REG_SCALAR(size) \ + kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \ + kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial +#define VIXL_DECL_PRINT_REG_SCALAR_FP(size) \ + VIXL_DECL_PRINT_REG_SCALAR(size) \ + , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \ + kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP + VIXL_DECL_PRINT_REG_SCALAR(W), + VIXL_DECL_PRINT_REG_SCALAR(X), + VIXL_DECL_PRINT_REG_SCALAR_FP(H), + VIXL_DECL_PRINT_REG_SCALAR_FP(S), + VIXL_DECL_PRINT_REG_SCALAR_FP(D), + VIXL_DECL_PRINT_REG_SCALAR(Q), +#undef VIXL_DECL_PRINT_REG_SCALAR +#undef VIXL_DECL_PRINT_REG_SCALAR_FP + +#define VIXL_DECL_PRINT_REG_NEON(count, type, size) \ + kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \ + kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial +#define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size) \ + VIXL_DECL_PRINT_REG_NEON(count, type, size) \ + , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \ + kPrintReg##count##type##PartialFP = \ + kPrintReg##count##type##Partial | kPrintRegAsFP + VIXL_DECL_PRINT_REG_NEON(1, B, Scalar), + VIXL_DECL_PRINT_REG_NEON(8, B, DVector), + VIXL_DECL_PRINT_REG_NEON(16, B, QVector), + VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar), + VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector), + VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector), + VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar), + VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector), + VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector), + VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar), + VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector), + VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar), +#undef VIXL_DECL_PRINT_REG_NEON +#undef VIXL_DECL_PRINT_REG_NEON_FP + +#define VIXL_DECL_PRINT_REG_SVE(type) \ + kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \ + kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial +#define VIXL_DECL_PRINT_REG_SVE_FP(type) \ + VIXL_DECL_PRINT_REG_SVE(type) \ + , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \ + kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP + VIXL_DECL_PRINT_REG_SVE(B), + VIXL_DECL_PRINT_REG_SVE_FP(H), + VIXL_DECL_PRINT_REG_SVE_FP(S), + VIXL_DECL_PRINT_REG_SVE_FP(D), + VIXL_DECL_PRINT_REG_SVE(Q) +#undef VIXL_DECL_PRINT_REG_SVE +#undef VIXL_DECL_PRINT_REG_SVE_FP }; + // Return `format` with the kPrintRegPartial flag set. + PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) { + // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the + // result of this cast will always be well-defined. + return static_cast<PrintRegisterFormat>(format | kPrintRegPartial); + } + + // For SVE formats, return the format of a Q register part of it. + PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) { + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + // Keep the FP and lane size fields. + int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP); + // The resulting format must always be partial, because we're not formatting + // the whole Z register. + q_format |= (kPrintRegAsQVector | kPrintRegPartial); + + // This cast is always safe because NEON QVector formats support every + // combination of FP and lane size that SVE formats do. + return static_cast<PrintRegisterFormat>(q_format); + } + unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) { + VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown); return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset; } @@ -1411,17 +1880,51 @@ class Simulator : public DecoderVisitor { } unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) { - if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2; - if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2; - - // Scalar types. - return GetPrintRegLaneSizeInBytesLog2(format); + switch (format & kPrintRegAsVectorMask) { + case kPrintRegAsScalar: + return GetPrintRegLaneSizeInBytesLog2(format); + case kPrintRegAsDVector: + return kDRegSizeInBytesLog2; + case kPrintRegAsQVector: + return kQRegSizeInBytesLog2; + default: + case kPrintRegAsSVEVector: + // We print SVE vectors in Q-sized chunks. These need special handling, + // and it's probably an error to call this function in that case. + VIXL_UNREACHABLE(); + return kQRegSizeInBytesLog2; + } } unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) { return 1 << GetPrintRegSizeInBytesLog2(format); } + unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) { + return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2; + } + + unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) { + return 1 << GetPrintRegSizeInBitsLog2(format); + } + + const char* GetPartialRegSuffix(PrintRegisterFormat format) { + switch (GetPrintRegSizeInBitsLog2(format)) { + case kBRegSizeLog2: + return "<7:0>"; + case kHRegSizeLog2: + return "<15:0>"; + case kSRegSizeLog2: + return "<31:0>"; + case kDRegSizeLog2: + return "<63:0>"; + case kQRegSizeLog2: + return "<127:0>"; + } + VIXL_UNREACHABLE(); + return "<UNKNOWN>"; + } + unsigned GetPrintRegLaneCount(PrintRegisterFormat format) { unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format); unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format); @@ -1429,6 +1932,21 @@ class Simulator : public DecoderVisitor { return 1 << (reg_size_log2 - lane_size_log2); } + uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) { + int print_as = format & kPrintRegAsVectorMask; + if (print_as == kPrintRegAsScalar) return 1; + + // Vector formats, including SVE formats printed in Q-sized chunks. + static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001}; + unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format); + VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks)); + uint16_t mask = masks[size_in_bytes_log2]; + + // Exclude lanes that aren't visible in D vectors. + if (print_as == kPrintRegAsDVector) mask &= 0x00ff; + return mask; + } + PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size, unsigned lane_size); @@ -1459,6 +1977,10 @@ class Simulator : public DecoderVisitor { return format; } + PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) { + return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size)); + } + template <typename T> PrintRegisterFormat GetPrintRegisterFormat(T value) { return GetPrintRegisterFormatForSize(sizeof(value)); @@ -1485,99 +2007,314 @@ class Simulator : public DecoderVisitor { // Print all registers of the specified types. void PrintRegisters(); void PrintVRegisters(); + void PrintZRegisters(); void PrintSystemRegisters(); // As above, but only print the registers that have been updated. void PrintWrittenRegisters(); void PrintWrittenVRegisters(); + void PrintWrittenPRegisters(); // As above, but respect LOG_REG and LOG_VREG. void LogWrittenRegisters() { - if (GetTraceParameters() & LOG_REGS) PrintWrittenRegisters(); + if (ShouldTraceRegs()) PrintWrittenRegisters(); } void LogWrittenVRegisters() { - if (GetTraceParameters() & LOG_VREGS) PrintWrittenVRegisters(); + if (ShouldTraceVRegs()) PrintWrittenVRegisters(); + } + void LogWrittenPRegisters() { + if (ShouldTraceVRegs()) PrintWrittenPRegisters(); } void LogAllWrittenRegisters() { LogWrittenRegisters(); LogWrittenVRegisters(); + LogWrittenPRegisters(); + } + + // The amount of space to leave for a register name. This is used to keep the + // values vertically aligned. The longest register name has the form + // "z31<2047:1920>". The total overall value indentation must also take into + // account the fixed formatting: "# {name}: 0x{value}". + static const int kPrintRegisterNameFieldWidth = 14; + + // Print whole, individual register values. + // - The format can be used to restrict how much of the register is printed, + // but such formats indicate that the unprinted high-order bits are zero and + // these helpers will assert that. + // - If the format includes the kPrintRegAsFP flag then human-friendly FP + // value annotations will be printed. + // - The suffix can be used to add annotations (such as memory access + // details), or to suppress the newline. + void PrintRegister(int code, + PrintRegisterFormat format = kPrintXReg, + const char* suffix = "\n"); + void PrintVRegister(int code, + PrintRegisterFormat format = kPrintReg1Q, + const char* suffix = "\n"); + // PrintZRegister and PrintPRegister print over several lines, so they cannot + // allow the suffix to be overridden. + void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ); + void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ); + void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ); + // Print a single Q-sized part of a Z register, or the corresponding two-byte + // part of a P register. These print single lines, and therefore allow the + // suffix to be overridden. The format must include the kPrintRegPartial flag. + void PrintPartialZRegister(int code, + int q_index, + PrintRegisterFormat format = kPrintRegVnQ, + const char* suffix = "\n"); + void PrintPartialPRegister(int code, + int q_index, + PrintRegisterFormat format = kPrintRegVnQ, + const char* suffix = "\n"); + void PrintPartialPRegister(const char* name, + const SimPRegister& reg, + int q_index, + PrintRegisterFormat format = kPrintRegVnQ, + const char* suffix = "\n"); + + // Like Print*Register (above), but respect trace parameters. + void LogRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceRegs()) PrintRegister(code, format); + } + void LogVRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintVRegister(code, format); + } + void LogZRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintZRegister(code, format); + } + void LogPRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintPRegister(code, format); + } + void LogFFR(PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintFFR(format); } - // Print individual register values (after update). - void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer); - void PrintVRegister(unsigned code, PrintRegisterFormat format); + // Other state updates, including system registers. void PrintSystemRegister(SystemRegister id); void PrintTakenBranch(const Instruction* target); + void LogSystemRegister(SystemRegister id) { + if (ShouldTraceSysRegs()) PrintSystemRegister(id); + } + void LogTakenBranch(const Instruction* target) { + if (ShouldTraceBranches()) PrintTakenBranch(target); + } - // Like Print* (above), but respect GetTraceParameters(). - void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) { - if (GetTraceParameters() & LOG_REGS) PrintRegister(code, r31mode); + // Trace memory accesses. + + // Common, contiguous register accesses (such as for scalars). + // The *Write variants automatically set kPrintRegPartial on the format. + void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address); + void PrintExtendingRead(int rt_code, + PrintRegisterFormat format, + int access_size_in_bytes, + uintptr_t address); + void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address); + void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address); + void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address); + // Simple, unpredicated SVE accesses always access the whole vector, and never + // know the lane type, so there's no need to accept a `format`. + void PrintZRead(int rt_code, uintptr_t address) { + vregisters_[rt_code].NotifyRegisterLogged(); + PrintZAccess(rt_code, "<-", address); } - void LogVRegister(unsigned code, PrintRegisterFormat format) { - if (GetTraceParameters() & LOG_VREGS) PrintVRegister(code, format); + void PrintZWrite(int rt_code, uintptr_t address) { + PrintZAccess(rt_code, "->", address); } - void LogSystemRegister(SystemRegister id) { - if (GetTraceParameters() & LOG_SYSREGS) PrintSystemRegister(id); + void PrintPRead(int rt_code, uintptr_t address) { + pregisters_[rt_code].NotifyRegisterLogged(); + PrintPAccess(rt_code, "<-", address); + } + void PrintPWrite(int rt_code, uintptr_t address) { + PrintPAccess(rt_code, "->", address); } - void LogTakenBranch(const Instruction* target) { - if (GetTraceParameters() & LOG_BRANCH) PrintTakenBranch(target); - } - - // Print memory accesses. - void PrintRead(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format); - void PrintWrite(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format); - void PrintVRead(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format, - unsigned lane); - void PrintVWrite(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format, - unsigned lane); // Like Print* (above), but respect GetTraceParameters(). - void LogRead(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format) { - if (GetTraceParameters() & LOG_REGS) PrintRead(address, reg_code, format); - } - void LogWrite(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format) { - if (GetTraceParameters() & LOG_WRITE) PrintWrite(address, reg_code, format); - } - void LogVRead(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format, - unsigned lane = 0) { - if (GetTraceParameters() & LOG_VREGS) { - PrintVRead(address, reg_code, format, lane); + void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceRegs()) PrintRead(rt_code, format, address); + } + void LogExtendingRead(int rt_code, + PrintRegisterFormat format, + int access_size_in_bytes, + uintptr_t address) { + if (ShouldTraceRegs()) { + PrintExtendingRead(rt_code, format, access_size_in_bytes, address); } } - void LogVWrite(uintptr_t address, - unsigned reg_code, - PrintRegisterFormat format, - unsigned lane = 0) { - if (GetTraceParameters() & LOG_WRITE) { - PrintVWrite(address, reg_code, format, lane); - } + void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceWrites()) PrintWrite(rt_code, format, address); + } + void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address); + } + void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address); + } + void LogZRead(int rt_code, uintptr_t address) { + if (ShouldTraceVRegs()) PrintZRead(rt_code, address); + } + void LogZWrite(int rt_code, uintptr_t address) { + if (ShouldTraceWrites()) PrintZWrite(rt_code, address); + } + void LogPRead(int rt_code, uintptr_t address) { + if (ShouldTraceVRegs()) PrintPRead(rt_code, address); + } + void LogPWrite(int rt_code, uintptr_t address) { + if (ShouldTraceWrites()) PrintPWrite(rt_code, address); + } + + // Helpers for the above, where the access operation is parameterised. + // - For loads, set op = "<-". + // - For stores, set op = "->". + void PrintAccess(int rt_code, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + void PrintVAccess(int rt_code, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + // Simple, unpredicated SVE accesses always access the whole vector, and never + // know the lane type, so these don't accept a `format`. + void PrintZAccess(int rt_code, const char* op, uintptr_t address); + void PrintPAccess(int rt_code, const char* op, uintptr_t address); + + // Multiple-structure accesses. + void PrintVStructAccess(int rt_code, + int reg_count, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + // Single-structure (single-lane) accesses. + void PrintVSingleStructAccess(int rt_code, + int reg_count, + int lane, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + // Replicating accesses. + void PrintVReplicatingStructAccess(int rt_code, + int reg_count, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + + // Multiple-structure accesses. + void PrintZStructAccess(int rt_code, + int reg_count, + const LogicPRegister& pg, + PrintRegisterFormat format, + int msize_in_bytes, + const char* op, + const LogicSVEAddressVector& addr); + + // Register-printing helper for all structured accessors. + // + // All lanes (according to `format`) are printed, but lanes indicated by + // `focus_mask` are of particular interest. Each bit corresponds to a byte in + // the printed register, in a manner similar to SVE's predicates. Currently, + // this is used to determine when to print human-readable FP annotations. + void PrintVRegistersForStructuredAccess(int rt_code, + int reg_count, + uint16_t focus_mask, + PrintRegisterFormat format); + + // As for the VRegister variant, but print partial Z register names. + void PrintZRegistersForStructuredAccess(int rt_code, + int q_index, + int reg_count, + uint16_t focus_mask, + PrintRegisterFormat format); + + // Print part of a memory access. This should be used for annotating + // non-trivial accesses, such as structured or sign-extending loads. Call + // Print*Register (or Print*RegistersForStructuredAccess), then + // PrintPartialAccess for each contiguous access that makes up the + // instruction. + // + // access_mask: + // The lanes to be printed. Each bit corresponds to a byte in the printed + // register, in a manner similar to SVE's predicates, except that the + // lane size is not respected when interpreting lane_mask: unaligned bits + // must be zeroed. + // + // This function asserts that this mask is non-zero. + // + // future_access_mask: + // The lanes to be printed by a future invocation. This must be specified + // because vertical lines are drawn for partial accesses that haven't yet + // been printed. The format is the same as for accessed_mask. + // + // If a lane is active in both `access_mask` and `future_access_mask`, + // `access_mask` takes precedence. + // + // struct_element_count: + // The number of elements in each structure. For non-structured accesses, + // set this to one. Along with lane_size_in_bytes, this is used determine + // the size of each access, and to format the accessed value. + // + // op: + // For stores, use "->". For loads, use "<-". + // + // address: + // The address of this partial access. (Not the base address of the whole + // instruction.) The traced value is read from this address (according to + // part_count and lane_size_in_bytes) so it must be accessible, and when + // tracing stores, the store must have been executed before this function + // is called. + // + // reg_size_in_bytes: + // The size of the register being accessed. This helper is usually used + // for V registers or Q-sized chunks of Z registers, so that is the + // default, but it is possible to use this to annotate X register + // accesses by specifying kXRegSizeInBytes. + // + // The return value is a future_access_mask suitable for the next iteration, + // so that it is possible to execute this in a loop, until the mask is zero. + // Note that accessed_mask must still be updated by the caller for each call. + uint16_t PrintPartialAccess(uint16_t access_mask, + uint16_t future_access_mask, + int struct_element_count, + int lane_size_in_bytes, + const char* op, + uintptr_t address, + int reg_size_in_bytes = kQRegSizeInBytes); + + // Print an abstract register value. This works for all register types, and + // can print parts of registers. This exists to ensure consistent formatting + // of values. + void PrintRegisterValue(const uint8_t* value, + int value_size, + PrintRegisterFormat format); + template <typename T> + void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) { + PrintRegisterValue(sim_register.GetBytes(), + std::min(sim_register.GetSizeInBytes(), + kQRegSizeInBytes), + format); } - // Helper functions for register tracing. - void PrintRegisterRawHelper(unsigned code, - Reg31Mode r31mode, - int size_in_bytes = kXRegSizeInBytes); - void PrintVRegisterRawHelper(unsigned code, - int bytes = kQRegSizeInBytes, - int lsb = 0); - void PrintVRegisterFPHelper(unsigned code, - unsigned lane_size_in_bytes, - int lane_count = 1, - int rightmost_lane = 0); + // As above, but format as an SVE predicate value, using binary notation with + // spaces between each bit so that they align with the Z register bytes that + // they predicate. + void PrintPRegisterValue(uint16_t value); + + void PrintRegisterValueFPAnnotations(const uint8_t* value, + uint16_t lane_mask, + PrintRegisterFormat format); + template <typename T> + void PrintRegisterValueFPAnnotations(const T& sim_register, + uint16_t lane_mask, + PrintRegisterFormat format) { + PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format); + } + template <typename T> + void PrintRegisterValueFPAnnotations(const T& sim_register, + PrintRegisterFormat format) { + PrintRegisterValueFPAnnotations(sim_register.GetBytes(), + GetPrintRegLaneMask(format), + format); + } VIXL_NO_RETURN void DoUnreachable(const Instruction* instr); void DoTrace(const Instruction* instr); @@ -1587,10 +2324,13 @@ class Simulator : public DecoderVisitor { Reg31Mode mode = Reg31IsZeroRegister); static const char* XRegNameForCode(unsigned code, Reg31Mode mode = Reg31IsZeroRegister); + static const char* BRegNameForCode(unsigned code); static const char* HRegNameForCode(unsigned code); static const char* SRegNameForCode(unsigned code); static const char* DRegNameForCode(unsigned code); static const char* VRegNameForCode(unsigned code); + static const char* ZRegNameForCode(unsigned code); + static const char* PRegNameForCode(unsigned code); bool IsColouredTrace() const { return coloured_trace_; } VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) { @@ -1609,18 +2349,28 @@ class Simulator : public DecoderVisitor { return GetTraceParameters(); } + bool ShouldTraceWrites() const { + return (GetTraceParameters() & LOG_WRITE) != 0; + } + bool ShouldTraceRegs() const { + return (GetTraceParameters() & LOG_REGS) != 0; + } + bool ShouldTraceVRegs() const { + return (GetTraceParameters() & LOG_VREGS) != 0; + } + bool ShouldTraceSysRegs() const { + return (GetTraceParameters() & LOG_SYSREGS) != 0; + } + bool ShouldTraceBranches() const { + return (GetTraceParameters() & LOG_BRANCH) != 0; + } + void SetTraceParameters(int parameters); VIXL_DEPRECATED("SetTraceParameters", void set_trace_parameters(int parameters)) { SetTraceParameters(parameters); } - void SetInstructionStats(bool value); - VIXL_DEPRECATED("SetInstructionStats", - void set_instruction_stats(bool value)) { - SetInstructionStats(value); - } - // Clear the simulated local monitor to force the next store-exclusive // instruction to fail. void ClearLocalMonitor() { local_monitor_.Clear(); } @@ -1803,6 +2553,92 @@ class Simulator : public DecoderVisitor { }; #endif + // Configure the simulated value of 'VL', which is the size of a Z register. + // Because this cannot occur during a program's lifetime, this function also + // resets the SVE registers. + void SetVectorLengthInBits(unsigned vector_length); + + unsigned GetVectorLengthInBits() const { return vector_length_; } + unsigned GetVectorLengthInBytes() const { + VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0); + return vector_length_ / kBitsPerByte; + } + unsigned GetPredicateLengthInBits() const { + VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0); + return GetVectorLengthInBits() / kZRegBitsPerPRegBit; + } + unsigned GetPredicateLengthInBytes() const { + VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0); + return GetVectorLengthInBytes() / kZRegBitsPerPRegBit; + } + + unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const { + if (IsSVEFormat(vform)) { + return GetVectorLengthInBits(); + } else { + return vixl::aarch64::RegisterSizeInBitsFromFormat(vform); + } + } + + unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const { + unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform); + VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0); + return size_in_bits / kBitsPerByte; + } + + int LaneCountFromFormat(VectorFormat vform) const { + if (IsSVEFormat(vform)) { + return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform); + } else { + return vixl::aarch64::LaneCountFromFormat(vform); + } + } + + bool IsFirstActive(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (mask.IsActive(vform, i)) { + return bits.IsActive(vform, i); + } + } + return false; + } + + bool AreNoneActive(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) { + return false; + } + } + return true; + } + + bool IsLastActive(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + if (mask.IsActive(vform, i)) { + return bits.IsActive(vform, i); + } + } + return false; + } + + void PredTest(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + ReadNzcv().SetN(IsFirstActive(vform, mask, bits)); + ReadNzcv().SetZ(AreNoneActive(vform, mask, bits)); + ReadNzcv().SetC(!IsLastActive(vform, mask, bits)); + ReadNzcv().SetV(0); + LogSystemRegister(NZCV); + } + + SimPRegister& GetPTrue() { return pregister_all_true_; } + protected: const char* clr_normal; const char* clr_flag_name; @@ -1811,6 +2647,8 @@ class Simulator : public DecoderVisitor { const char* clr_reg_value; const char* clr_vreg_name; const char* clr_vreg_value; + const char* clr_preg_name; + const char* clr_preg_value; const char* clr_memory_address; const char* clr_warning; const char* clr_warning_message; @@ -1818,6 +2656,13 @@ class Simulator : public DecoderVisitor { const char* clr_branch_marker; // Simulation helpers ------------------------------------ + + void ResetSystemRegisters(); + void ResetRegisters(); + void ResetVRegisters(); + void ResetPRegisters(); + void ResetFFR(); + bool ConditionPassed(Condition cond) { switch (cond) { case eq: @@ -1907,7 +2752,7 @@ class Simulator : public DecoderVisitor { } int64_t ShiftOperand(unsigned reg_size, - int64_t value, + uint64_t value, Shift shift_type, unsigned amount) const; int64_t ExtendValue(unsigned reg_width, @@ -1919,6 +2764,11 @@ class Simulator : public DecoderVisitor { void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr); void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr); void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr); + void ld1r(VectorFormat vform, + VectorFormat unpack_vform, + LogicVRegister dst, + uint64_t addr, + bool is_signed = false); void ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, @@ -2020,16 +2870,43 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + // Add `value` to each lane of `src1`, treating `value` as unsigned for the + // purposes of setting the saturation flags. + LogicVRegister add_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value); LogicVRegister addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + LogicPRegister brka(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn); + LogicPRegister brkb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn); + LogicPRegister brkn(LogicPRegister pdm, + const LogicPRegister& pg, + const LogicPRegister& pn); + LogicPRegister brkpa(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm); + LogicPRegister brkpb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm); + // dst = srca + src1 * src2 LogicVRegister mla(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); + // dst = srca - src1 * src2 LogicVRegister mls(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister mul(VectorFormat vform, @@ -2055,6 +2932,14 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + LogicVRegister sdiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister udiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform, LogicVRegister dst, @@ -2101,6 +2986,10 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); + LogicVRegister smulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); LogicVRegister smull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -2161,6 +3050,10 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); + LogicVRegister umulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); LogicVRegister sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -2225,6 +3118,12 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + // Subtract `value` from each lane of `src1`, treating `value` as unsigned for + // the purposes of setting the saturation flags. + LogicVRegister sub_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value); LogicVRegister and_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -2267,6 +3166,9 @@ class Simulator : public DecoderVisitor { LogicVRegister clz(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); + LogicVRegister cnot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); LogicVRegister cnt(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -2278,8 +3180,11 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister rev(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src, - int revSize); + const LogicVRegister& src); + LogicVRegister rev_byte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rev_size); LogicVRegister rev16(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -2327,6 +3232,7 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, + const LogicVRegister& acc, int index, int rot); LogicVRegister fcmla(VectorFormat vform, @@ -2335,17 +3241,25 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src2, int index, int rot); - template <typename T> LogicVRegister fcmla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, + const LogicVRegister& acc, int rot); - LogicVRegister fcmla(VectorFormat vform, + template <typename T> + LogicVRegister fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister index(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - int rot); + uint64_t start, + uint64_t step); LogicVRegister ins_element(VectorFormat vform, LogicVRegister dst, int dst_index, @@ -2355,13 +3269,36 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, int dst_index, uint64_t imm); + LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm); LogicVRegister dup_element(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int src_index); + LogicVRegister dup_elements_to_segments(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index); LogicVRegister dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm); + LogicVRegister mov(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src); + LogicVRegister mov_merging(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src); + LogicVRegister mov_zeroing(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src); + LogicPRegister mov_merging(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + LogicPRegister mov_zeroing(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm); LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm); LogicVRegister orr(VectorFormat vform, @@ -2376,6 +3313,32 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + // Perform a "conditional last" operation. The first part of the pair is true + // if any predicate lane is active, false otherwise. The second part takes the + // value of the last active (plus offset) lane, or last (plus offset) lane if + // none active. + std::pair<bool, uint64_t> clast(VectorFormat vform, + const LogicPRegister& pg, + const LogicVRegister& src2, + int offset_from_last_active); + LogicVRegister compact(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister splice(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sel(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicPRegister sel(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src1, + const LogicPRegister& src2); LogicVRegister sminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -2416,6 +3379,7 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister sminmaxv(VectorFormat vform, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src, bool max); LogicVRegister smaxv(VectorFormat vform, @@ -2436,6 +3400,14 @@ class Simulator : public DecoderVisitor { LogicVRegister sxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); + LogicVRegister uxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits); + LogicVRegister sxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits); LogicVRegister tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, @@ -2460,6 +3432,10 @@ class Simulator : public DecoderVisitor { const LogicVRegister& ind); LogicVRegister Table(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& src, + const LogicVRegister& tab); + LogicVRegister Table(VectorFormat vform, + LogicVRegister dst, const LogicVRegister& ind, bool zero_out_of_bounds, const LogicVRegister* tab1, @@ -2580,6 +3556,7 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src2); LogicVRegister uminmaxv(VectorFormat vform, LogicVRegister dst, + const LogicPRegister& pg, const LogicVRegister& src, bool max); LogicVRegister umaxv(VectorFormat vform, @@ -2617,11 +3594,27 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src, int shift); LogicVRegister scvtf(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister scvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding rounding_mode); LogicVRegister ucvtf(VectorFormat vform, + unsigned dst_data_size, + unsigned src_data_size, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister ucvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, @@ -2706,9 +3699,9 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister extractnarrow(VectorFormat vform, LogicVRegister dst, - bool dstIsSigned, + bool dst_is_signed, const LogicVRegister& src, - bool srcIsSigned); + bool src_is_signed); LogicVRegister xtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -2725,7 +3718,7 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - bool issigned); + bool is_signed); LogicVRegister saba(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -2951,19 +3944,23 @@ class Simulator : public DecoderVisitor { template <typename T> LogicVRegister fmla(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister fmla(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); template <typename T> LogicVRegister fmls(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister fmls(VectorFormat vform, LogicVRegister dst, + const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister fnmul(VectorFormat vform, @@ -3023,6 +4020,31 @@ class Simulator : public DecoderVisitor { LogicVRegister frecpx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); + LogicVRegister ftsmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ftssel(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ftmad(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + unsigned index); + LogicVRegister fexpa(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + template <typename T> + LogicVRegister fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); template <typename T> LogicVRegister fabs_(VectorFormat vform, LogicVRegister dst, @@ -3034,19 +4056,40 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - LogicVRegister frint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, bool inexact_exception = false, FrintMode frint_mode = kFrintToInteger); + LogicVRegister fcvt(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister fcvts(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); LogicVRegister fcvts(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, int fbits = 0); LogicVRegister fcvtu(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister fcvtu(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, @@ -3086,16 +4129,78 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src); + LogicPRegister pfalse(LogicPRegister dst); + LogicPRegister pfirst(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern); + LogicPRegister pnext(VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + + LogicVRegister asrd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int shift); + + LogicVRegister andv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister eorv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister orv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister saddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister sminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister uaddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister uminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + template <typename T> - struct TFPMinMaxOp { + struct TFPPairOp { typedef T (Simulator::*type)(T a, T b); }; template <typename T> - LogicVRegister fminmaxv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - typename TFPMinMaxOp<T>::type Op); + LogicVRegister FPPairedAcrossHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp<T>::type fn, + uint64_t inactive_value); + + LogicVRegister FPPairedAcrossHelper( + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp<vixl::internal::SimFloat16>::type fn16, + typename TFPPairOp<float>::type fn32, + typename TFPPairOp<double>::type fn64, + uint64_t inactive_value); LogicVRegister fminv(VectorFormat vform, LogicVRegister dst, @@ -3109,6 +4214,9 @@ class Simulator : public DecoderVisitor { LogicVRegister fmaxnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); + LogicVRegister faddv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); static const uint32_t CRC32_POLY = 0x04C11DB7; static const uint32_t CRC32C_POLY = 0x1EDC6F41; @@ -3209,6 +4317,129 @@ class Simulator : public DecoderVisitor { void DoSaveCPUFeatures(const Instruction* instr); void DoRestoreCPUFeatures(const Instruction* instr); + // General arithmetic helpers ---------------------------- + + // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or + // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must + // be a valid int<n>_t. + uint64_t IncDecN(uint64_t acc, + int64_t delta, + unsigned n, + bool is_saturating = false, + bool is_signed = false); + + // SVE helpers ------------------------------------------- + LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op, + VectorFormat vform, + LogicVRegister zd, + const LogicVRegister& zn, + const LogicVRegister& zm); + + LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, + LogicPRegister Pd, + const LogicPRegister& pn, + const LogicPRegister& pm); + + LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op, + VectorFormat vform, + LogicVRegister zd, + uint64_t imm); + enum UnpackType { kHiHalf, kLoHalf }; + enum ExtendType { kSignedExtend, kUnsignedExtend }; + LogicVRegister unpk(VectorFormat vform, + LogicVRegister zd, + const LogicVRegister& zn, + UnpackType unpack_type, + ExtendType extend_type); + + LogicPRegister SVEIntCompareVectorsHelper(Condition cc, + VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& mask, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements = false, + FlagsUpdate flags = SetFlags); + + void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, + VectorFormat vform, + SVEOffsetModifier mod); + + // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`. + // + // `zt_code` specifies the code of the first register (zt). Each additional + // register (up to `reg_count`) is `(zt_code + i) % 32`. + // + // This helper calls LogZWrite in the proper way, according to `addr`. + void SVEStructuredStoreHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr); + // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`. + void SVEStructuredLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + bool is_signed = false); + + enum SVEFaultTolerantLoadType { + // - Elements active in both FFR and pg are accessed as usual. If the access + // fails, the corresponding lane and all subsequent lanes are filled with + // an unpredictable value, and made inactive in FFR. + // + // - Elements active in FFR but not pg are set to zero. + // + // - Elements that are not active in FFR are filled with an unpredictable + // value, regardless of pg. + kSVENonFaultLoad, + + // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the + // first active element is always accessed, regardless of FFR, and will + // generate a real fault if it is inaccessible. If the lane is not active in + // FFR, the actual value loaded into the result is still unpredictable. + kSVEFirstFaultLoad + }; + + // Load with first-faulting or non-faulting load semantics, respecting and + // updating FFR. + void SVEFaultTolerantLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + SVEFaultTolerantLoadType type, + bool is_signed); + + LogicVRegister SVEBitwiseShiftHelper(Shift shift_op, + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements); + + template <typename T> + LogicVRegister FTMaddHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + uint64_t coeff_pos, + uint64_t coeff_neg); + + // Return the first or last active lane, or -1 if none are active. + int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const; + int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const; + + int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const; + + // Count active and true lanes in `pn`. + int CountActiveAndTrueLanes(VectorFormat vform, + const LogicPRegister& pg, + const LogicPRegister& pn) const; + + // Count the number of lanes referred to by `pattern`, given the vector + // length. If `pattern` is not a recognised SVEPredicateConstraint, this + // returns zero. + int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const; + // Simulate a runtime call. void DoRuntimeCall(const Instruction* instr); @@ -3222,15 +4453,21 @@ class Simulator : public DecoderVisitor { FILE* stream_; PrintDisassembler* print_disasm_; - // Instruction statistics instrumentation. - Instrument* instrumentation_; - // General purpose registers. Register 31 is the stack pointer. SimRegister registers_[kNumberOfRegisters]; // Vector registers SimVRegister vregisters_[kNumberOfVRegisters]; + // SVE predicate registers. + SimPRegister pregisters_[kNumberOfPRegisters]; + + // SVE first-fault register. + SimFFRRegister ffr_register_; + + // A pseudo SVE predicate register with all bits set to true. + SimPRegister pregister_all_true_; + // Program Status Register. // bits[31, 27]: Condition flags N, Z, C, and V. // (Negative, Zero, Carry, Overflow) @@ -3266,8 +4503,10 @@ class Simulator : public DecoderVisitor { // Stack byte* stack_; static const int stack_protection_size_ = 256; - // 2 KB stack. - static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_; + // 8 KB stack. + // TODO: Make this configurable, or automatically allocate space as it runs + // out (like the OS would try to do). + static const int stack_size_ = 8 * 1024 + 2 * stack_protection_size_; byte* stack_limit_; Decoder* decoder_; @@ -3276,6 +4515,10 @@ class Simulator : public DecoderVisitor { bool pc_modified_; const Instruction* pc_; + // If non-NULL, the last instruction was a movprfx, and validity needs to be + // checked. + Instruction const* movprfx_; + // Branch type register, used for branch target identification. BType btype_; @@ -3289,10 +4532,13 @@ class Simulator : public DecoderVisitor { static const char* xreg_names[]; static const char* wreg_names[]; + static const char* breg_names[]; static const char* hreg_names[]; static const char* sreg_names[]; static const char* dreg_names[]; static const char* vreg_names[]; + static const char* zreg_names[]; + static const char* preg_names[]; private: static const PACKey kPACKeyIA; @@ -3301,6 +4547,13 @@ class Simulator : public DecoderVisitor { static const PACKey kPACKeyDB; static const PACKey kPACKeyGA; + bool CanReadMemory(uintptr_t address, size_t size); + + // CanReadMemory needs dummy file descriptors, so we use a pipe. We can save + // some system call overhead by opening them on construction, rather than on + // every call to CanReadMemory. + int dummy_pipe_fd_[2]; + template <typename T> static T FPDefaultNaN(); @@ -3353,14 +4606,24 @@ class Simulator : public DecoderVisitor { } } + // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of + // the destination is set to all true (0xff) when the corresponding + // predicate flag is set, and false (0x00) otherwise. + SimVRegister ExpandToSimVRegister(const SimPRegister& preg); + + // Set each predicate flag in pd where the corresponding assigned-sized lane + // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite + // operation to ExpandToSimVRegister(), except that any non-zero lane is + // interpreted as true. + void ExtractFromSimVRegister(VectorFormat vform, + SimPRegister& pd, // NOLINT(runtime/references) + SimVRegister vreg); + bool coloured_trace_; // A set of TraceParameters flags. int trace_parameters_; - // Indicates whether the instruction instrumentation is active. - bool instruction_stats_; - // Indicates whether the exclusive-access warning has been printed. bool print_exclusive_access_warning_; void PrintExclusiveAccessWarning(); @@ -3368,8 +4631,14 @@ class Simulator : public DecoderVisitor { CPUFeaturesAuditor cpu_features_auditor_; std::vector<CPUFeatures> saved_cpu_features_; - // The simulated state of RNDR and RNDRRS for generating a random number. - uint16_t rndr_state_[3]; + // State for *rand48 functions, used to simulate randomness with repeatable + // behaviour (so that tests are deterministic). This is used to simulate RNDR + // and RNDRRS, as well as to simulate a source of entropy for architecturally + // undefined behaviour. + uint16_t rand_state_[3]; + + // A configurable size of SVE vector registers. + unsigned vector_length_; }; #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L diff --git a/src/aarch64/simulator-constants-aarch64.h b/src/aarch64/simulator-constants-aarch64.h index 3256f30e..e2389f11 100644 --- a/src/aarch64/simulator-constants-aarch64.h +++ b/src/aarch64/simulator-constants-aarch64.h @@ -121,7 +121,7 @@ const unsigned kTraceLength = 3 * kInstructionSize; enum TraceParameters { LOG_DISASM = 1 << 0, // Log disassembly. LOG_REGS = 1 << 1, // Log general purpose registers. - LOG_VREGS = 1 << 2, // Log NEON and floating-point registers. + LOG_VREGS = 1 << 2, // Log SVE, NEON and floating-point registers. LOG_SYSREGS = 1 << 3, // Log the flags and system registers. LOG_WRITE = 1 << 4, // Log writes to memory. LOG_BRANCH = 1 << 5, // Log taken branches. diff --git a/src/cpu-features.cc b/src/cpu-features.cc index ea1e0d3e..08db3f44 100644 --- a/src/cpu-features.cc +++ b/src/cpu-features.cc @@ -37,31 +37,9 @@ namespace vixl { -static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) { - if (feature == CPUFeatures::kNone) { - return 0; - } else { - // Check that the shift is well-defined, and that the feature is valid. - VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <= - (sizeof(uint64_t) * 8)); - VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures); - return UINT64_C(1) << feature; - } -} - -CPUFeatures::CPUFeatures(Feature feature0, - Feature feature1, - Feature feature2, - Feature feature3) - : features_(0) { - Combine(feature0, feature1, feature2, feature3); -} - CPUFeatures CPUFeatures::All() { CPUFeatures all; - // Check that the shift is well-defined. - VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8)); - all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1; + all.features_.set(); return all; } @@ -89,74 +67,27 @@ void CPUFeatures::Combine(const CPUFeatures& other) { features_ |= other.features_; } -void CPUFeatures::Combine(Feature feature0, - Feature feature1, - Feature feature2, - Feature feature3) { - features_ |= MakeFeatureMask(feature0); - features_ |= MakeFeatureMask(feature1); - features_ |= MakeFeatureMask(feature2); - features_ |= MakeFeatureMask(feature3); +void CPUFeatures::Combine(Feature feature) { + if (feature != CPUFeatures::kNone) features_.set(feature); } void CPUFeatures::Remove(const CPUFeatures& other) { features_ &= ~other.features_; } -void CPUFeatures::Remove(Feature feature0, - Feature feature1, - Feature feature2, - Feature feature3) { - features_ &= ~MakeFeatureMask(feature0); - features_ &= ~MakeFeatureMask(feature1); - features_ &= ~MakeFeatureMask(feature2); - features_ &= ~MakeFeatureMask(feature3); -} - -CPUFeatures CPUFeatures::With(const CPUFeatures& other) const { - CPUFeatures f(*this); - f.Combine(other); - return f; -} - -CPUFeatures CPUFeatures::With(Feature feature0, - Feature feature1, - Feature feature2, - Feature feature3) const { - CPUFeatures f(*this); - f.Combine(feature0, feature1, feature2, feature3); - return f; -} - -CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const { - CPUFeatures f(*this); - f.Remove(other); - return f; -} - -CPUFeatures CPUFeatures::Without(Feature feature0, - Feature feature1, - Feature feature2, - Feature feature3) const { - CPUFeatures f(*this); - f.Remove(feature0, feature1, feature2, feature3); - return f; +void CPUFeatures::Remove(Feature feature) { + if (feature != CPUFeatures::kNone) features_.reset(feature); } bool CPUFeatures::Has(const CPUFeatures& other) const { return (features_ & other.features_) == other.features_; } -bool CPUFeatures::Has(Feature feature0, - Feature feature1, - Feature feature2, - Feature feature3) const { - uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) | - MakeFeatureMask(feature2) | MakeFeatureMask(feature3); - return (features_ & mask) == mask; +bool CPUFeatures::Has(Feature feature) const { + return (feature == CPUFeatures::kNone) || features_[feature]; } -size_t CPUFeatures::Count() const { return CountSetBits(features_); } +size_t CPUFeatures::Count() const { return features_.count(); } std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) { // clang-format off @@ -177,12 +108,9 @@ VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE) } CPUFeatures::const_iterator CPUFeatures::begin() const { - if (features_ == 0) return const_iterator(this, kNone); - - int feature_number = CountTrailingZeros(features_); - vixl::CPUFeatures::Feature feature = - static_cast<CPUFeatures::Feature>(feature_number); - return const_iterator(this, feature); + // For iterators in general, it's undefined to increment `end()`, but here we + // control the implementation and it is safe to do this. + return ++end(); } CPUFeatures::const_iterator CPUFeatures::end() const { @@ -190,11 +118,11 @@ CPUFeatures::const_iterator CPUFeatures::end() const { } std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) { - CPUFeatures::const_iterator it = features.begin(); - while (it != features.end()) { - os << *it; - ++it; - if (it != features.end()) os << ", "; + bool need_separator = false; + for (CPUFeatures::Feature feature : features) { + if (need_separator) os << ", "; + need_separator = true; + os << feature; } return os; } @@ -205,7 +133,7 @@ bool CPUFeaturesConstIterator::operator==( return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_); } -CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix +CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() { // Prefix VIXL_ASSERT(IsValid()); do { // Find the next feature. The order is unspecified. @@ -219,11 +147,11 @@ CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix // cpu_features_->Has(kNone) is always true, so this will terminate even if // the features list is empty. } while (!cpu_features_->Has(feature_)); - return feature_; + return *this; } -CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) { // Postfix - CPUFeatures::Feature result = feature_; +CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) { // Postfix + CPUFeaturesConstIterator result = *this; ++(*this); return result; } diff --git a/src/cpu-features.h b/src/cpu-features.h index 50ddc267..1b0f2c24 100644 --- a/src/cpu-features.h +++ b/src/cpu-features.h @@ -27,6 +27,7 @@ #ifndef VIXL_CPU_FEATURES_H #define VIXL_CPU_FEATURES_H +#include <bitset> #include <ostream> #include "globals-vixl.h" @@ -34,16 +35,65 @@ namespace vixl { +// VIXL aims to handle and detect all architectural features that are likely to +// influence code-generation decisions at EL0 (user-space). +// +// - There may be multiple VIXL feature flags for a given architectural +// extension. This occurs where the extension allow components to be +// implemented independently, or where kernel support is needed, and is likely +// to be fragmented. +// +// For example, Pointer Authentication (kPAuth*) has a separate feature flag +// for access to PACGA, and to indicate that the QARMA algorithm is +// implemented. +// +// - Conversely, some extensions have configuration options that do not affect +// EL0, so these are presented as a single VIXL feature. +// +// For example, the RAS extension (kRAS) has several variants, but the only +// feature relevant to VIXL is the addition of the ESB instruction so we only +// need a single flag. +// +// - VIXL offers separate flags for separate features even if they're +// architecturally linked. +// +// For example, the architecture requires kFPHalf and kNEONHalf to be equal, +// but they have separate hardware ID register fields so VIXL presents them as +// separate features. +// +// - VIXL can detect every feature for which it can generate code. +// +// - VIXL can detect some features for which it cannot generate code. +// +// The CPUFeatures::Feature enum — derived from the macro list below — is +// frequently extended. New features may be added to the list at any point, and +// no assumptions should be made about the numerical values assigned to each +// enum constant. The symbolic names can be considered to be stable. +// +// The debug descriptions are used only for debug output. The 'cpuinfo' strings +// are informative; VIXL does not use /proc/cpuinfo for feature detection. + // clang-format off #define VIXL_CPU_FEATURE_LIST(V) \ /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ /* registers, so that the detailed feature registers can be read */ \ /* directly. */ \ + \ + /* Constant name Debug description Linux 'cpuinfo' string. */ \ V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ \ V(kFP, "FP", "fp") \ V(kNEON, "NEON", "asimd") \ V(kCRC32, "CRC32", "crc32") \ + V(kDGH, "DGH", "dgh") \ + /* Speculation control features. */ \ + V(kCSV2, "CSV2", NULL) \ + V(kSCXTNUM, "SCXTNUM", NULL) \ + V(kCSV3, "CSV3", NULL) \ + V(kSB, "SB", "sb") \ + V(kSPECRES, "SPECRES", NULL) \ + V(kSSBS, "SSBS", NULL) \ + V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \ /* Cryptographic support instructions. */ \ V(kAES, "AES", "aes") \ V(kSHA1, "SHA1", "sha1") \ @@ -58,28 +108,36 @@ namespace vixl { V(kRDM, "RDM", "asimdrdm") \ /* Scalable Vector Extension. */ \ V(kSVE, "SVE", "sve") \ + V(kSVEF64MM, "SVE F64MM", "svef64mm") \ + V(kSVEF32MM, "SVE F32MM", "svef32mm") \ + V(kSVEI8MM, "SVE I8MM", "svei8imm") \ + V(kSVEBF16, "SVE BFloat16", "svebf16") \ /* SDOT and UDOT support (in NEON). */ \ V(kDotProduct, "DotProduct", "asimddp") \ + /* Int8 matrix multiplication (in NEON). */ \ + V(kI8MM, "NEON I8MM", "i8mm") \ /* Half-precision (FP16) support for FP and NEON, respectively. */ \ V(kFPHalf, "FPHalf", "fphp") \ V(kNEONHalf, "NEONHalf", "asimdhp") \ + /* BFloat16 support (in both FP and NEON.) */ \ + V(kBF16, "FP/NEON BFloat 16", "bf16") \ /* The RAS extension, including the ESB instruction. */ \ V(kRAS, "RAS", NULL) \ /* Data cache clean to the point of persistence: DC CVAP. */ \ V(kDCPoP, "DCPoP", "dcpop") \ /* Data cache clean to the point of deep persistence: DC CVADP. */ \ - V(kDCCVADP, "DCCVADP", NULL) \ + V(kDCCVADP, "DCCVADP", "dcpodp") \ /* Cryptographic support instructions. */ \ V(kSHA3, "SHA3", "sha3") \ V(kSHA512, "SHA512", "sha512") \ V(kSM3, "SM3", "sm3") \ V(kSM4, "SM4", "sm4") \ /* Pointer authentication for addresses. */ \ - V(kPAuth, "PAuth", NULL) \ + V(kPAuth, "PAuth", "paca") \ /* Pointer authentication for addresses uses QARMA. */ \ V(kPAuthQARMA, "PAuthQARMA", NULL) \ /* Generic authentication (using the PACGA instruction). */ \ - V(kPAuthGeneric, "PAuthGeneric", NULL) \ + V(kPAuthGeneric, "PAuthGeneric", "pacg") \ /* Generic authentication uses QARMA. */ \ V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ @@ -98,13 +156,21 @@ namespace vixl { /* Data-independent timing (for selected instructions). */ \ V(kDIT, "DIT", "dit") \ /* Branch target identification. */ \ - V(kBTI, "BTI", NULL) \ + V(kBTI, "BTI", "bti") \ /* Flag manipulation instructions: {AX,XA}FLAG */ \ - V(kAXFlag, "AXFlag", NULL) \ + V(kAXFlag, "AXFlag", "flagm2") \ /* Random number generation extension, */ \ - V(kRNG, "RNG", NULL) \ + V(kRNG, "RNG", "rng") \ /* Floating-point round to {32,64}-bit integer. */ \ - V(kFrintToFixedSizedInt,"Frint (bounded)", NULL) + V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \ + /* Memory Tagging Extension. */ \ + V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \ + V(kMTE, "MTE", NULL) \ + /* PAuth extensions. */ \ + V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ + V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ + V(kPAuthFPAC, "PAuth FPAC", NULL) \ + V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) // clang-format on @@ -197,13 +263,13 @@ class CPUFeatures { // clang-format on // By default, construct with no features enabled. - CPUFeatures() : features_(0) {} + CPUFeatures() : features_{} {} // Construct with some features already enabled. - CPUFeatures(Feature feature0, - Feature feature1 = kNone, - Feature feature2 = kNone, - Feature feature3 = kNone); + template <typename T, typename... U> + CPUFeatures(T first, U... others) : features_{} { + Combine(first, others...); + } // Construct with all features enabled. This can be used to disable feature // checking: `Has(...)` returns true regardless of the argument. @@ -236,41 +302,59 @@ class CPUFeatures { // exist in this set are left unchanged. void Combine(const CPUFeatures& other); - // Combine specific features into this set. Features that already exist in - // this set are left unchanged. - void Combine(Feature feature0, - Feature feature1 = kNone, - Feature feature2 = kNone, - Feature feature3 = kNone); + // Combine a specific feature into this set. If it already exists in the set, + // the set is left unchanged. + void Combine(Feature feature); + + // Combine multiple features (or feature sets) into this set. + template <typename T, typename... U> + void Combine(T first, U... others) { + Combine(first); + Combine(others...); + } // Remove features in another CPUFeatures object from this one. void Remove(const CPUFeatures& other); - // Remove specific features from this set. - void Remove(Feature feature0, - Feature feature1 = kNone, - Feature feature2 = kNone, - Feature feature3 = kNone); - - // Chaining helpers for convenient construction. - CPUFeatures With(const CPUFeatures& other) const; - CPUFeatures With(Feature feature0, - Feature feature1 = kNone, - Feature feature2 = kNone, - Feature feature3 = kNone) const; - CPUFeatures Without(const CPUFeatures& other) const; - CPUFeatures Without(Feature feature0, - Feature feature1 = kNone, - Feature feature2 = kNone, - Feature feature3 = kNone) const; - - // Query features. - // Note that an empty query (like `Has(kNone)`) always returns true. + // Remove a specific feature from this set. This has no effect if the feature + // doesn't exist in the set. + void Remove(Feature feature0); + + // Remove multiple features (or feature sets) from this set. + template <typename T, typename... U> + void Remove(T first, U... others) { + Remove(first); + Remove(others...); + } + + // Chaining helpers for convenient construction by combining other CPUFeatures + // or individual Features. + template <typename... T> + CPUFeatures With(T... others) const { + CPUFeatures f(*this); + f.Combine(others...); + return f; + } + + template <typename... T> + CPUFeatures Without(T... others) const { + CPUFeatures f(*this); + f.Remove(others...); + return f; + } + + // Test whether the `other` feature set is equal to or a subset of this one. bool Has(const CPUFeatures& other) const; - bool Has(Feature feature0, - Feature feature1 = kNone, - Feature feature2 = kNone, - Feature feature3 = kNone) const; + + // Test whether a single feature exists in this set. + // Note that `Has(kNone)` always returns true. + bool Has(Feature feature) const; + + // Test whether all of the specified features exist in this set. + template <typename T, typename... U> + bool Has(T first, U... others) const { + return Has(first) && Has(others...); + } // Return the number of enabled features. size_t Count() const; @@ -288,9 +372,8 @@ class CPUFeatures { const_iterator end() const; private: - // Each bit represents a feature. This field will be replaced as needed if - // features are added. - uint64_t features_; + // Each bit represents a feature. This set will be extended as needed. + std::bitset<kNumberOfFeatures> features_; friend std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); @@ -313,8 +396,8 @@ class CPUFeaturesConstIterator { bool operator!=(const CPUFeaturesConstIterator& other) const { return !(*this == other); } - CPUFeatures::Feature operator++(); - CPUFeatures::Feature operator++(int); + CPUFeaturesConstIterator& operator++(); + CPUFeaturesConstIterator operator++(int); CPUFeatures::Feature operator*() const { VIXL_ASSERT(IsValid()); @@ -359,21 +442,17 @@ class CPUFeaturesScope { // Start a CPUFeaturesScope on any object that implements // `CPUFeatures* GetCPUFeatures()`. template <typename T> - explicit CPUFeaturesScope(T* cpu_features_wrapper, - CPUFeatures::Feature feature0 = CPUFeatures::kNone, - CPUFeatures::Feature feature1 = CPUFeatures::kNone, - CPUFeatures::Feature feature2 = CPUFeatures::kNone, - CPUFeatures::Feature feature3 = CPUFeatures::kNone) + explicit CPUFeaturesScope(T* cpu_features_wrapper) : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), - old_features_(*cpu_features_) { - cpu_features_->Combine(feature0, feature1, feature2, feature3); - } + old_features_(*cpu_features_) {} - template <typename T> - CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other) + // Start a CPUFeaturesScope on any object that implements + // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled. + template <typename T, typename U, typename... V> + CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features) : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), old_features_(*cpu_features_) { - cpu_features_->Combine(other); + cpu_features_->Combine(first, features...); } ~CPUFeaturesScope() { *cpu_features_ = old_features_; } diff --git a/src/globals-vixl.h b/src/globals-vixl.h index 640b4b9b..4dc8c024 100644 --- a/src/globals-vixl.h +++ b/src/globals-vixl.h @@ -27,6 +27,10 @@ #ifndef VIXL_GLOBALS_H #define VIXL_GLOBALS_H +#if __cplusplus < 201402L +#error VIXL requires C++14 +#endif + // Get standard C99 macros for integer types. #ifndef __STDC_CONSTANT_MACROS #define __STDC_CONSTANT_MACROS @@ -66,7 +70,8 @@ typedef uint8_t byte; const int KBytes = 1024; const int MBytes = 1024 * KBytes; -const int kBitsPerByte = 8; +const int kBitsPerByteLog2 = 3; +const int kBitsPerByte = 1 << kBitsPerByteLog2; template <int SizeInBits> struct Unsigned; @@ -223,8 +228,11 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {} #if __cplusplus >= 201103L #define VIXL_OVERRIDE override +#define VIXL_CONSTEXPR constexpr +#define VIXL_HAS_CONSTEXPR 1 #else #define VIXL_OVERRIDE +#define VIXL_CONSTEXPR #endif // With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw diff --git a/src/invalset-vixl.h b/src/invalset-vixl.h index fbfb6a01..8bd6035e 100644 --- a/src/invalset-vixl.h +++ b/src/invalset-vixl.h @@ -842,9 +842,7 @@ InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other) #if __cplusplus >= 201103L template <class S> InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept - : using_vector_(false), - index_(0), - inval_set_(NULL) { + : using_vector_(false), index_(0), inval_set_(NULL) { swap(*this, other); } #endif diff --git a/src/pool-manager-impl.h b/src/pool-manager-impl.h index 66ecd6a4..a1bcaaad 100644 --- a/src/pool-manager-impl.h +++ b/src/pool-manager-impl.h @@ -264,14 +264,14 @@ bool PoolManager<T>::MustEmit(T pc, if (checkpoint < temp.min_location_) return true; } - bool tempNotPlacedYet = true; + bool temp_not_placed_yet = true; for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) { const PoolObject<T>& current = objects_[i]; - if (tempNotPlacedYet && PoolObjectLessThan(current, temp)) { + if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) { checkpoint = UpdateCheckpointForObject(checkpoint, &temp); if (checkpoint < temp.min_location_) return true; if (CheckFuturePC(pc, checkpoint)) return true; - tempNotPlacedYet = false; + temp_not_placed_yet = false; } if (current.label_base_ == label_base) continue; checkpoint = UpdateCheckpointForObject(checkpoint, ¤t); @@ -279,7 +279,7 @@ bool PoolManager<T>::MustEmit(T pc, if (CheckFuturePC(pc, checkpoint)) return true; } // temp is the object with the smallest max_location_. - if (tempNotPlacedYet) { + if (temp_not_placed_yet) { checkpoint = UpdateCheckpointForObject(checkpoint, &temp); if (checkpoint < temp.min_location_) return true; } @@ -497,7 +497,7 @@ PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { } #endif // Delete objects the pool manager owns. - for (typename std::vector<LocationBase<T> *>::iterator + for (typename std::vector<LocationBase<T>*>::iterator iter = delete_on_destruction_.begin(), end = delete_on_destruction_.end(); iter != end; diff --git a/src/utils-vixl.h b/src/utils-vixl.h index c9287e40..0ae6dfc0 100644 --- a/src/utils-vixl.h +++ b/src/utils-vixl.h @@ -67,7 +67,7 @@ namespace vixl { #endif template <typename T, size_t n> -size_t ArrayLength(const T (&)[n]) { +constexpr size_t ArrayLength(const T (&)[n]) { return n; } @@ -77,25 +77,30 @@ inline uint64_t GetUintMask(unsigned bits) { return base - 1; } +inline uint64_t GetSignMask(unsigned bits) { + VIXL_ASSERT(bits <= 64); + return UINT64_C(1) << (bits - 1); +} + // Check number width. // TODO: Refactor these using templates. inline bool IsIntN(unsigned n, uint32_t x) { - VIXL_ASSERT((0 < n) && (n < 32)); - uint32_t limit = UINT32_C(1) << (n - 1); - return x < limit; + VIXL_ASSERT((0 < n) && (n <= 32)); + return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n)); } inline bool IsIntN(unsigned n, int32_t x) { - VIXL_ASSERT((0 < n) && (n < 32)); + VIXL_ASSERT((0 < n) && (n <= 32)); + if (n == 32) return true; int32_t limit = INT32_C(1) << (n - 1); return (-limit <= x) && (x < limit); } inline bool IsIntN(unsigned n, uint64_t x) { - VIXL_ASSERT((0 < n) && (n < 64)); - uint64_t limit = UINT64_C(1) << (n - 1); - return x < limit; + VIXL_ASSERT((0 < n) && (n <= 64)); + return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n)); } inline bool IsIntN(unsigned n, int64_t x) { - VIXL_ASSERT((0 < n) && (n < 64)); + VIXL_ASSERT((0 < n) && (n <= 64)); + if (n == 64) return true; int64_t limit = INT64_C(1) << (n - 1); return (-limit <= x) && (x < limit); } @@ -104,7 +109,8 @@ VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { } inline bool IsUintN(unsigned n, uint32_t x) { - VIXL_ASSERT((0 < n) && (n < 32)); + VIXL_ASSERT((0 < n) && (n <= 32)); + if (n >= 32) return true; return !(x >> n); } inline bool IsUintN(unsigned n, int32_t x) { @@ -113,7 +119,8 @@ inline bool IsUintN(unsigned n, int32_t x) { return !(static_cast<uint32_t>(x) >> n); } inline bool IsUintN(unsigned n, uint64_t x) { - VIXL_ASSERT((0 < n) && (n < 64)); + VIXL_ASSERT((0 < n) && (n <= 64)); + if (n >= 64) return true; return !(x >> n); } inline bool IsUintN(unsigned n, int64_t x) { @@ -189,7 +196,7 @@ inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { } -inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) { +inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) { VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && (msb >= lsb)); return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); @@ -209,8 +216,7 @@ inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) { return result; } - -inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) { +inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) { VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && (msb >= lsb)); uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); @@ -219,7 +225,6 @@ inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) { return result; } - inline uint64_t RotateRight(uint64_t value, unsigned int rotate, unsigned int width) { @@ -277,6 +282,19 @@ VIXL_DEPRECATED("RawbitsToDouble", return RawbitsToDouble(bits); } +// Convert unsigned to signed numbers in a well-defined way (using two's +// complement representations). +inline int64_t RawbitsToInt64(uint64_t bits) { + return (bits >= UINT64_C(0x8000000000000000)) + ? (-static_cast<int64_t>(-bits - 1) - 1) + : static_cast<int64_t>(bits); +} + +inline int32_t RawbitsToInt32(uint32_t bits) { + return (bits >= UINT64_C(0x80000000)) ? (-static_cast<int32_t>(-bits - 1) - 1) + : static_cast<int32_t>(bits); +} + namespace internal { // Internal simulation class used solely by the simulator to @@ -371,6 +389,10 @@ VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { bool IsZero(Float16 value); +inline bool IsPositiveZero(double value) { + return (value == 0.0) && (copysign(1.0, value) > 0.0); +} + inline bool IsNaN(float value) { return std::isnan(value); } inline bool IsNaN(double value) { return std::isnan(value); } @@ -490,11 +512,11 @@ T ReverseBits(T value) { template <typename T> -inline T SignExtend(T val, int bitSize) { - VIXL_ASSERT(bitSize > 0); - T mask = (T(2) << (bitSize - 1)) - T(1); +inline T SignExtend(T val, int size_in_bits) { + VIXL_ASSERT(size_in_bits > 0); + T mask = (T(2) << (size_in_bits - 1)) - T(1); val &= mask; - T sign_bits = -((val >> (bitSize - 1)) << bitSize); + T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits); val |= sign_bits; return val; } @@ -576,7 +598,7 @@ T AlignUp(T pointer, // reinterpret_cast behaviour for other types. typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = - (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; + (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); size_t mask = alignment - 1; @@ -596,7 +618,7 @@ T AlignDown(T pointer, // reinterpret_cast behaviour for other types. typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = - (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; + (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); size_t mask = alignment - 1; @@ -980,6 +1002,42 @@ Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} Int64 BitCount(Uint32 value); +// The algorithm used is adapted from the one described in section 8.2 of +// Hacker's Delight, by Henry S. Warren, Jr. +template <unsigned N, typename T> +int64_t MultiplyHigh(T u, T v) { + uint64_t u0, v0, w0, u1, v1, w1, w2, t; + VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64)); + uint64_t sign_mask = UINT64_C(1) << (N - 1); + uint64_t sign_ext = 0; + unsigned half_bits = N / 2; + uint64_t half_mask = GetUintMask(half_bits); + if (std::numeric_limits<T>::is_signed) { + sign_ext = UINT64_C(0xffffffffffffffff) << half_bits; + } + + VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); + VIXL_ASSERT(sizeof(u) == sizeof(u0)); + + u0 = u & half_mask; + u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0); + v0 = v & half_mask; + v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0); + + w0 = u0 * v0; + t = u1 * v0 + (w0 >> half_bits); + + w1 = t & half_mask; + w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0); + w1 = u0 * v1 + w1; + w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0); + + uint64_t value = u1 * v1 + w2 + w1; + int64_t result; + memcpy(&result, &value, sizeof(result)); + return result; +} + } // namespace internal // The default NaN values (for FPCR.DN=1). @@ -1244,9 +1302,8 @@ inline Float16 FPRoundToFloat16(int64_t sign, uint64_t mantissa, FPRounding round_mode) { return RawbitsToFloat16( - FPRound<uint16_t, - kFloat16ExponentBits, - kFloat16MantissaBits>(sign, exponent, mantissa, round_mode)); + FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>( + sign, exponent, mantissa, round_mode)); } @@ -1282,6 +1339,62 @@ Float16 FPToFloat16(double value, FPRounding round_mode, UseDefaultNaN DN, bool* exception = NULL); + +// Like static_cast<T>(value), but with specialisations for the Float16 type. +template <typename T, typename F> +T StaticCastFPTo(F value) { + return static_cast<T>(value); +} + +template <> +inline float StaticCastFPTo<float, Float16>(Float16 value) { + return FPToFloat(value, kIgnoreDefaultNaN); +} + +template <> +inline double StaticCastFPTo<double, Float16>(Float16 value) { + return FPToDouble(value, kIgnoreDefaultNaN); +} + +template <> +inline Float16 StaticCastFPTo<Float16, float>(float value) { + return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); +} + +template <> +inline Float16 StaticCastFPTo<Float16, double>(double value) { + return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); +} + +template <typename T> +uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) { + switch (size_in_bits) { + case 16: + return Float16ToRawbits(StaticCastFPTo<Float16>(value)); + case 32: + return FloatToRawbits(StaticCastFPTo<float>(value)); + case 64: + return DoubleToRawbits(StaticCastFPTo<double>(value)); + } + VIXL_UNREACHABLE(); + return 0; +} + +template <typename T> +T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) { + VIXL_ASSERT(IsUintN(size_in_bits, value)); + switch (size_in_bits) { + case 16: + return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value))); + case 32: + return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value))); + case 64: + return StaticCastFPTo<T>(RawbitsToDouble(value)); + } + VIXL_UNREACHABLE(); + return 0; +} + } // namespace vixl #endif // VIXL_UTILS_H |